**Importing Libraries**

In [2]:
import pandas as pd
from transformers import BertTokenizer, BertModel
from gensim.models import Word2Vec
import torch
import numpy as np

**Loading Data**

In [5]:
# Load the dataset
file_path = r"C:\Users\sidhe\Downloads\cleaned_combined_dataset.xlsx"

data = pd.read_excel(file_path)

# Specify the columns for processing
job_description_col = 'job_description'
transcript_col = 'transcript'
resume_col = 'resume'

**Importing The Bert Model**

In [6]:
# Specify the columns for processing
job_description_col = 'job_description'
transcript_col = 'transcript'
resume_col = 'resume'

# Initialize BERT tokenizer and model
bert_model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(bert_model_name)
bert_model = BertModel.from_pretrained(bert_model_name)


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

**Function to get BERT embeddings and Process text columns with BERT embeddings**

In [7]:
# 
def get_bert_embeddings(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = bert_model(**inputs)
    # Use the [CLS] token embedding as a summary of the text
    cls_embedding = outputs.last_hidden_state[:, 0, :].squeeze(0).numpy()
    return cls_embedding

# Process text columns with BERT embeddings
def process_with_bert(column_name):
    embeddings = []
    for text in data[column_name].fillna(''):
        embedding = get_bert_embeddings(text)
        embeddings.append(embedding)
    return embeddings

**getting the enbeddings**

In [8]:
print("Generating BERT embeddings for job descriptions...")
data[f'{job_description_col}_bert'] = process_with_bert(job_description_col)

print("Generating BERT embeddings for transcripts...")
data[f'{transcript_col}_bert'] = process_with_bert(transcript_col)

print("Generating BERT embeddings for resumes...")
data[f'{resume_col}_bert'] = process_with_bert(resume_col)

# Prepare for Word2Vec embeddings
print("Preparing for Word2Vec embeddings...")
text_data = data[[job_description_col, transcript_col, resume_col]].fillna('').values.flatten()
tokenized_data = [text.split() for text in text_data]

Generating BERT embeddings for job descriptions...
Generating BERT embeddings for transcripts...
Generating BERT embeddings for resumes...
Preparing for Word2Vec embeddings...


**Train Word2Vec model and function to get Word2Vec embeddings**

In [9]:

word2vec_model = Word2Vec(sentences=tokenized_data, vector_size=100, window=5, min_count=1, workers=4)

# Function to get Word2Vec embeddings
def get_word2vec_embeddings(text):
    words = text.split()
    embeddings = [word2vec_model.wv[word] for word in words if word in word2vec_model.wv]
    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros(word2vec_model.vector_size)

**Process text columns with Word2Vec embeddings**

In [10]:
#
def process_with_word2vec(column_name):
    embeddings = []
    for text in data[column_name].fillna(''):
        embedding = get_word2vec_embeddings(text)
        embeddings.append(embedding)
    return embeddings


**Generating Word2Vec embeddings**

In [12]:
print("Generating Word2Vec embeddings for job descriptions...")
data[f'{job_description_col}_word2vec'] = process_with_word2vec(job_description_col)

print("Generating Word2Vec embeddings for transcripts...")
data[f'{transcript_col}_word2vec'] = process_with_word2vec(transcript_col)

print("Generating Word2Vec embeddings for resumes...")
data[f'{resume_col}_word2vec'] = process_with_word2vec(resume_col)

# Save processed data
output_file = r'C:\Users\sidhe\Downloads\processed_dataset_with_embeddings.xlsx'
data.to_excel(output_file, index=False)
print(f"Processed data saved to {output_file}")

Generating Word2Vec embeddings for job descriptions...
Generating Word2Vec embeddings for transcripts...
Generating Word2Vec embeddings for resumes...
Processed data saved to C:\Users\sidhe\Downloads\processed_dataset_with_embeddings.xlsx


**Loading the Glove Model for embeddings (Based on research on internet)**

In [26]:
import pandas as pd
import numpy as np
from gensim.models.keyedvectors import KeyedVectors

# Load GloVe embeddings (assuming you have the pre-trained GloVe file, e.g., 'glove.6B.100d.txt')
def load_glove_model(glove_file):
    print("Loading GloVe model...")
    glove_model = {}
    with open(glove_file, 'r', encoding='utf-8') as f:
        for line in f:
            parts = line.split()
            word = parts[0]
            vector = np.array(parts[1:], dtype=float)
            glove_model[word] = vector
    return glove_model

# Function to get GloVe vector for a word (returns zero vector if word is not in the GloVe model)
def get_glove_vector(word, glove_model, vector_size=100):
    return glove_model.get(word, np.zeros(vector_size))

# Load your Excel data
csv_file = r"B:\OneDrive - Amity University\Desktop\Tower Research\cleaned_combined_dataset.xlsx"
df = pd.read_excel(csv_file) 

# Load GloVe embeddings (replace with path to your GloVe file)
glove_model = load_glove_model("B:\OneDrive - Amity University\Desktop\glove.6B\glove.6B.100d.txt")

# Select columns to apply GloVe embeddings
columns_to_embed = ['job_description', 'transcript', 'resume']  # Replace with the columns you want to embed

# Embed the columns
for column in columns_to_embed:
    print(f"Embedding column: {column}")
    df[column + '_embedding'] = df[column].apply(lambda x: np.mean([get_glove_vector(word, glove_model) for word in str(x).split()], axis=0))

# Check the first few rows of the dataframe
print(df.head())

# Save the dataframe with the embeddings to an Excel file
output_file = r'C:\Users\sidhe\Downloads\processed_dataset_with_embeddings_glove_2.xlsx'
with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
    df.to_excel(writer, index=False, sheet_name='Embeddings')

print(f"File saved to {output_file}")


Loading GloVe model...
Embedding column: job_description
Embedding column: transcript
Embedding column: resume
          id          name               role  \
0  brenbr359   Brent Brown    Product Manager   
1  jameay305   James Ayala  Software Engineer   
2  scotri565  Scott Rivera      Data Engineer   
3  emilke232   Emily Kelly        UI Engineer   
4  ashlra638    Ashley Ray     Data Scientist   

                                          transcript  \
0  Product Manager Interview Transcript\n\nInterv...   
1  Software Engineer Interview Transcript\n\nInte...   
2  Here is a simulated interview for Scott Rivera...   
3  Interview Transcript: Emily Kelly for UI Engin...   
4  Data Scientist Interview Transcript\n\nCompany...   

                                              resume decision  \
0  Here's a sample resume for Brent Brown applyin...   select   
1  Here's a sample resume for James Ayala applyin...   select   
2  Here's a sample resume for Scott Rivera applyi...   reject 

**pre proccesing the csv of embeddings for futther work**

In [33]:
import pandas as pd
import numpy as np

# Load the CSV file
df = pd.read_excel('B:\OneDrive - Amity University\Desktop\Tower Research\processed_dataset_with_embeddings.xlsx')

# Example: Assuming the embeddings are in columns 6, 7, and 8 (adjust according to your actual column names)
embedding_columns = ['job_description_bert', 'transcript_bert', 'resume_bert','job_description_word2vec','transcript_word2vec','resume_word2vec']  # replace with your actual column names

# Function to convert the string of numbers into a list of floats
def convert_to_float(embedding_str):
    embedding_list = embedding_str.strip('[]').split()  # remove the brackets and split the numbers
    return [float(num) for num in embedding_list]  # convert each number to float

# Apply the function to the relevant columns
for col in embedding_columns:
    df[col] = df[col].apply(convert_to_float)

# Optionally, check the first few rows of the DataFrame to confirm
print(df.head())

# Save the modified DataFrame to a new CSV if needed
df.to_csv('modified_embeddings_bert_word2vec.csv', index=False)


          id          name               role  \
0  brenbr359   Brent Brown    Product Manager   
1  jameay305   James Ayala  Software Engineer   
2  scotri565  Scott Rivera      Data Engineer   
3  emilke232   Emily Kelly        UI Engineer   
4  ashlra638    Ashley Ray     Data Scientist   

                                          transcript  \
0  Product Manager Interview Transcript\n\nInterv...   
1  Software Engineer Interview Transcript\n\nInte...   
2  Here is a simulated interview for Scott Rivera...   
3  Interview Transcript: Emily Kelly for UI Engin...   
4  Data Scientist Interview Transcript\n\nCompany...   

                                              resume decision  \
0  Here's a sample resume for Brent Brown applyin...   select   
1  Here's a sample resume for James Ayala applyin...   select   
2  Here's a sample resume for Scott Rivera applyi...   reject   
3  Here's a sample resume for Emily Kelly:\n\nEmi...   select   
4  Here's a sample resume for Ashley Ray ap

**pre proccesing the glove embeddings for further work**

In [35]:
import pandas as pd
import numpy as np

# Load the CSV file
df = pd.read_excel('B:\OneDrive - Amity University\Desktop\Tower Research\processed_dataset_with_embeddings_glove_2.xlsx')

# Example: Assuming the embeddings are in columns 6, 7, and 8 (adjust according to your actual column names)
embedding_columns = ['job_description_embedding', 'transcript_embedding','resume_embedding']  # replace with your actual column names

# Function to convert the string of numbers into a list of floats
def convert_to_float(embedding_str):
    embedding_list = embedding_str.strip('[]').split()  # remove the brackets and split the numbers
    return [float(num) for num in embedding_list]  # convert each number to float

# Apply the function to the relevant columns
for col in embedding_columns:
    df[col] = df[col].apply(convert_to_float)

# Optionally, check the first few rows of the DataFrame to confirm
print(df.head())

# Save the modified DataFrame to a new CSV if needed
df.to_csv('modified_embeddings_glove.csv', index=False)


          id          name               role  \
0  brenbr359   Brent Brown    Product Manager   
1  jameay305   James Ayala  Software Engineer   
2  scotri565  Scott Rivera      Data Engineer   
3  emilke232   Emily Kelly        UI Engineer   
4  ashlra638    Ashley Ray     Data Scientist   

                                          transcript  \
0  Product Manager Interview Transcript\n\nInterv...   
1  Software Engineer Interview Transcript\n\nInte...   
2  Here is a simulated interview for Scott Rivera...   
3  Interview Transcript: Emily Kelly for UI Engin...   
4  Data Scientist Interview Transcript\n\nCompany...   

                                              resume decision  \
0  Here's a sample resume for Brent Brown applyin...   select   
1  Here's a sample resume for James Ayala applyin...   select   
2  Here's a sample resume for Scott Rivera applyi...   reject   
3  Here's a sample resume for Emily Kelly:\n\nEmi...   select   
4  Here's a sample resume for Ashley Ray ap

**training the 3 types of model embeddings in XG BOOST and ANN**

In [40]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.metrics import accuracy_score
import joblib
import ast

def process_embedding_string(embedding_str):
    """Convert string representation of embeddings to numpy array"""
    try:
        # Convert string representation of list to actual list
        return np.array(ast.literal_eval(embedding_str))
    except:
        return None

# Load BERT, Word2Vec, and GloVe embeddings CSV files
bert_word2vec_df = pd.read_csv(r'B:\OneDrive - Amity University\Desktop\Tower Research\Modify\modified_embeddings_bert_word2vec.csv')
glove_df = pd.read_csv(r'B:\OneDrive - Amity University\Desktop\Tower Research\Modify\modified_embeddings_glove.csv')

# Convert the 'decision' column into binary labels
decision_mapping = {'select': 1, 'Select': 1, 'selected': 1, 'rejected': 0, 'Reject': 0, 'reject': 0}
bert_word2vec_df['decision1'] = bert_word2vec_df['decision'].map(decision_mapping)
glove_df['decision1'] = glove_df['decision'].map(decision_mapping)

# Process embedding columns in both dataframes
embedding_columns_bert = bert_word2vec_df.columns.difference(['decision', 'decision1', 'id', 'name', 'role', 'transcript', 'resume', 'reason_for_decision', 'job_description', 'source_file', 'source_sheet'])
embedding_columns_glove = glove_df.columns.difference(['decision', 'decision1', 'id', 'name', 'role', 'transcript', 'resume', 'reason_for_decision', 'job_description', 'source_file', 'source_sheet'])

# Convert embedding strings to numpy arrays
for col in embedding_columns_bert:
    bert_word2vec_df[col] = bert_word2vec_df[col].apply(process_embedding_string)
for col in embedding_columns_glove:
    glove_df[col] = glove_df[col].apply(process_embedding_string)

# Create feature matrix by concatenating all embedding vectors
def create_feature_matrix(df, embedding_columns):
    features = []
    for _, row in df.iterrows():
        combined_embedding = np.concatenate([row[col] for col in embedding_columns if row[col] is not None])
        features.append(combined_embedding)
    return np.vstack(features)

# Create feature matrices
X_bert_word2vec = create_feature_matrix(bert_word2vec_df, embedding_columns_bert)
X_glove = create_feature_matrix(glove_df, embedding_columns_glove)

# Combine all features
X = np.hstack([X_bert_word2vec, X_glove])
y = bert_word2vec_df['decision1'].values

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train ANN Model
def train_ann(X_train, y_train, X_test, y_test):
    ann_model = Sequential()
    ann_model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
    ann_model.add(Dropout(0.2))
    ann_model.add(Dense(64, activation='relu'))
    ann_model.add(Dropout(0.2))
    ann_model.add(Dense(1, activation='sigmoid'))

    ann_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    ann_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

    y_pred_ann = (ann_model.predict(X_test) > 0.5).astype(int)
    ann_accuracy = accuracy_score(y_test, y_pred_ann)
    print(f'ANN Model Accuracy: {ann_accuracy:.4f}')
    return ann_model

# Train XGBoost Model
def train_xgboost(X_train, y_train, X_test, y_test):
    xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
    xgb_model.fit(X_train, y_train)
    
    y_pred_xgb = xgb_model.predict(X_test)
    xgb_accuracy = accuracy_score(y_test, y_pred_xgb)
    print(f'XGBoost Model Accuracy: {xgb_accuracy:.4f}')
    return xgb_model

# Train models
print("Training ANN model...")
ann_model = train_ann(X_train_scaled, y_train, X_test_scaled, y_test)

print("\nTraining XGBoost model...")
xgb_model = train_xgboost(X_train_scaled, y_train, X_test_scaled, y_test)

# Save models
ann_model.save('ann_model.h5')
xgb_model.save_model('xgb_model.json')
joblib.dump(ann_model, 'ann_model.pkl')

print("\nModels saved successfully as 'ann_model.h5', 'xgb_model.json', and 'ann_model.pkl'.")

Training ANN model...


Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
ANN Model Accuracy: 0.8123

Training XGBoost model...


Parameters: { "use_label_encoder" } are not used.



XGBoost Model Accuracy: 0.8092


  saving_api.save_model(



Models saved successfully as 'ann_model.h5', 'xgb_model.json', and 'ann_model.pkl'.


**Pre Proccessing the File for further work**

In [8]:
import pandas as pd
import numpy as np

# Load the Excel file
df = pd.read_excel(r'B:\OneDrive - Amity University\Desktop\Assignment-5\glove_word2vec_embss_bert_embedss_EDA_dataset.xlsx')


embedding_columns = ['job_description_bert', 'transcript_bert', 'resume_bert', 
                     'job_description_word2vec', 'transcript_word2vec', 'resume_word2vec', 
                     'job_description_embedding_glove', 'transcript_embedding_glove', 'resume_embedding_glove']

# Function to convert the string of numbers into a list of floats
def convert_to_float(embedding_str):
    if pd.isnull(embedding_str) or embedding_str == '':
        return []  # or np.nan depending on your preferred handling
    try:
        embedding_list = embedding_str.strip('[]').split()  # remove the brackets and split the numbers
        return [float(num) for num in embedding_list]  # convert each number to float
    except ValueError:
        return []  # or np.nan if conversion fails

# Apply the function to the relevant columns
for col in embedding_columns:
    df[col] = df[col].apply(convert_to_float)

# Optionally, check the first few rows of the DataFrame to confirm
print(df.head())

# Save the modified DataFrame to a new Excel file
df.to_excel(r'pre_glove_word2vec_embss_bert_embedss_EDA_dataset.xlsx', index=False)


          id          name               role  \
0  brenbr359   Brent Brown    Product Manager   
1  jameay305   James Ayala  Software Engineer   
2  scotri565  Scott Rivera      Data Engineer   
3  emilke232   Emily Kelly        UI Engineer   
4  ashlra638    Ashley Ray     Data Scientist   

                                          transcript  \
0  Product Manager Interview Transcript\n\nInterv...   
1  Software Engineer Interview Transcript\n\nInte...   
2  Here is a simulated interview for Scott Rivera...   
3  Interview Transcript: Emily Kelly for UI Engin...   
4  Data Scientist Interview Transcript\n\nCompany...   

                                              resume decision  \
0  Here's a sample resume for Brent Brown applyin...   select   
1  Here's a sample resume for James Ayala applyin...   select   
2  Here's a sample resume for Scott Rivera applyi...   reject   
3  Here's a sample resume for Emily Kelly:\n\nEmi...   select   
4  Here's a sample resume for Ashley Ray ap

**Training XG BOOST and ANN on EDA FEATURES with BERT,GLOVE AND WORD2VEC EMBEDSS**

In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.metrics import accuracy_score
import joblib
import ast

# Path to your dataset
final_path_file = r'B:\OneDrive - Amity University\Desktop\Assignment-5\pre_glove_word2vec_embss_bert_embedss_EDA_dataset.xlsx'
combine_df_final = pd.read_excel(final_path_file)

# Mapping for decision encoding
decision_mapping_final = {'select': 1, 'Select': 1, 'selected': 1, 'rejected': 0, 'Reject': 0, 'reject': 0}
combine_df_final['decision_encoded'] = combine_df_final['decision'].map(decision_mapping_final)

# Embedding and other feature columns
features = [
    'job_description_bert', 'transcript_bert', 'resume_bert', 'job_description_word2vec', 'transcript_word2vec', 
    'resume_word2vec', 'job_description_embedding_glove', 'transcript_embedding_glove', 'resume_embedding_glove',
    'resume_word_count', 'resume_char_count', 'resume_avg_word_length', 'resume_sentence_count',
    'resume_uppercase_ratio', 'resume_technical_keyword_count', 'resume_positive_keyword_count',
    'resume_negative_keyword_count', 'resume_unique_word_ratio', 'transcript_word_count',
    'transcript_char_count', 'transcript_avg_word_length', 'transcript_sentence_count',
    'transcript_uppercase_ratio', 'transcript_positive_keyword_count', 'transcript_negative_keyword_count',
    'transcript_unique_word_ratio', 'job_role_in_resume', 'resume_job_keyword_overlap',
    'transcript_job_keyword_overlap', 'role_popularity', 'decision_reason_encoded',
    'resume_job_similarity', 'transcript_job_similarity', 'transcript_resume_similarity'
]
target = 'decision_encoded'  

# Convert string representations of lists into actual lists
def convert_to_list(value):
    try:
        # Safely evaluate the string into a list
        return ast.literal_eval(value)
    except (ValueError, SyntaxError):
        # If conversion fails, return an empty list (or you can handle this differently)
        return []

# Apply the conversion function to embedding columns
embedding_columns = [
    'job_description_bert', 'transcript_bert', 'resume_bert',
    'job_description_word2vec', 'transcript_word2vec', 'resume_word2vec',
    'job_description_embedding_glove', 'transcript_embedding_glove', 'resume_embedding_glove'
]

for col in embedding_columns:
    combine_df_final[col] = combine_df_final[col].apply(convert_to_list)

# Flatten embedding columns to a single numeric value (e.g., by taking the mean of the list)
def flatten_embedding_column(df, column_name):
    df[column_name] = df[column_name].apply(lambda x: np.mean(x) if isinstance(x, list) else 0)

# Apply the flatten function to all embedding columns
for col in embedding_columns:
    flatten_embedding_column(combine_df_final, col)

# Now, prepare the features and target
X = combine_df_final[features]
y = combine_df_final[target]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train ANN Model
def train_ann(X_train, y_train, X_test, y_test):
    ann_model = Sequential()
    ann_model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
    ann_model.add(Dropout(0.2))
    ann_model.add(Dense(64, activation='relu'))
    ann_model.add(Dropout(0.2))
    ann_model.add(Dense(1, activation='sigmoid'))

    ann_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    ann_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

    y_pred_ann = (ann_model.predict(X_test) > 0.5).astype(int)
    ann_accuracy = accuracy_score(y_test, y_pred_ann)
    print(f'ANN Model Accuracy: {ann_accuracy:.4f}')
    return ann_model

# Train XGBoost Model
def train_xgboost(X_train, y_train, X_test, y_test):
    xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
    xgb_model.fit(X_train, y_train)
    
    y_pred_xgb = xgb_model.predict(X_test)
    xgb_accuracy = accuracy_score(y_test, y_pred_xgb)
    print(f'XGBoost Model Accuracy: {xgb_accuracy:.4f}')
    return xgb_model

# Train models
print("Training ANN model...")
ann_model = train_ann(X_train_scaled, y_train, X_test_scaled, y_test)

print("\nTraining XGBoost model...")
xgb_model = train_xgboost(X_train_scaled, y_train, X_test_scaled, y_test)

# Save models
ann_model.save('ann_model_final.h5')  # Keras model in .h5 format
xgb_model.save_model('xgb_model_final.json')  # XGBoost model in .json format

# Save the scaler (optional)
joblib.dump(scaler, 'scaler.pkl')

# Saving the ANN model with joblib for later loading (optional)
joblib.dump(ann_model, 'ann_model_final.pkl')


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


Training ANN model...


Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
ANN Model Accuracy: 0.5308

Training XGBoost model...


Parameters: { "use_label_encoder" } are not used.



XGBoost Model Accuracy: 0.8492


  saving_api.save_model(


['ann_model_final.pkl']