# Get the predictions

In [17]:
import pandas as pd

In [28]:
import ast
from sklearn.discriminant_analysis import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

# columns=['feature_type', 'model', 'with_hypertuning', 'best_params', 'accuracy', 'sensitivity', 'specificity', 'precision', 'f1', 'mcc', 'index']
test_results = pd.read_csv('test_results.csv')

models = {
    'LogisticRegression': LogisticRegression(),
    'SVC': SVC(),
    'XGBClassifier': XGBClassifier(),
    'LGBMClassifier': LGBMClassifier()
}

# Select best model based on accuracy for first 40 rows
best_model_without_feature_selection_id = test_results.iloc[:40]['accuracy'].idxmax()

# Select best model based on accuracy for remaining rows
best_model_with_feature_selection_id = test_results.iloc[40:]['accuracy'].idxmax()

# get the model name from the index
best_model_without_feature_selection = test_results.iloc[best_model_without_feature_selection_id]['model']
best_model_with_feature_selection = test_results.iloc[best_model_with_feature_selection_id]['model']
best_model_without_feature_selection_hyperparams = test_results.iloc[best_model_without_feature_selection_id]['with_hypertuning']
best_model_with_feature_selection_hyperparams = test_results.iloc[best_model_with_feature_selection_id]['with_hypertuning']

best_model_without_feature_selection = models[best_model_without_feature_selection]

# check if there are any hyperparameters
if best_model_without_feature_selection_hyperparams == 'True':
    best_model_without_feature_selection = models[best_model_without_feature_selection].set_params(**ast.literal_eval(test_results.iloc[best_model_without_feature_selection_id]['best_params']))

best_model_with_feature_selection = models[best_model_with_feature_selection]

if best_model_with_feature_selection_hyperparams == 'True':
    best_model_with_feature_selection = models[best_model_with_feature_selection].set_params(**ast.literal_eval(test_results.iloc[best_model_with_feature_selection_id]['best_params']))

# load the train data
X_train_for_without_feature_selection = pd.read_csv(f'./processed_dataset/TR_{test_results.iloc[best_model_without_feature_selection_id]["feature_type"]}.csv')
X_train_for_with_feature_selection = pd.read_csv(f'./feature_engineered_dataset/TR_{test_results.iloc[best_model_with_feature_selection_id]["feature_type"]}.csv')

# load the test data
X_test_for_without_feature_selection = pd.read_csv(f'./processed_dataset/TS_{test_results.iloc[best_model_without_feature_selection_id]["feature_type"]}.csv')
X_test_for_with_feature_selection = pd.read_csv(f'./feature_engineered_dataset/TS_{test_results.iloc[best_model_with_feature_selection_id]["feature_type"]}.csv')

# seperate the data variables and target variables
X_train_without_feature = X_train_for_without_feature_selection.drop(columns=['id', 'label'], axis=1)
y_train_without_feature = X_train_for_without_feature_selection['label']

X_test_without_feature = X_test_for_without_feature_selection.drop(columns=['id', 'label'], axis=1)
y_test_without_feature = X_test_for_without_feature_selection['label']

scaler = StandardScaler()
X_train_without_feature = scaler.fit_transform(X_train_without_feature)
X_test_without_feature = scaler.transform(X_test_without_feature)

# train the model_without_feature_selection
best_model_without_feature_selection.fit(X_train_without_feature, y_train_without_feature)

# predict the test data for model_without_feature_selection
y_pred = best_model_without_feature_selection.predict(X_test_without_feature)

# store the results
results_without_feature_selection = pd.DataFrame({'id':X_test_for_without_feature_selection['id'], 'predictions_without_feature_selection': y_pred, 'label': y_test_without_feature})


# seperate the data variables and target variables
X_train_with_feature = X_train_for_with_feature_selection.drop(columns=['id', 'label'], axis=1)
y_train_with_feature = X_train_for_with_feature_selection['label']

X_test_with_feature = X_test_for_with_feature_selection.drop(columns=['id', 'label'], axis=1)
y_test_with_feature = X_test_for_with_feature_selection['label']

scaler = StandardScaler()
X_train_with_feature = scaler.fit_transform(X_train_with_feature)
X_test_with_feature = scaler.transform(X_test_with_feature)

# train the model_with_feature_selection
best_model_with_feature_selection.fit(X_train_with_feature, y_train_with_feature)

# predict the test data for model_with_feature_selection
y_pred = best_model_with_feature_selection.predict(X_test_with_feature)

# store the results
results_with_feature_selection = pd.DataFrame({'id':X_test_for_with_feature_selection['id'], 'predictions_with_feature_selection': y_pred, 'label': y_test_with_feature})

# merge the results
results = pd.merge(results_without_feature_selection, results_with_feature_selection, on=['id', 'label'])

# reareange the columns
results = results[['id', 'predictions_without_feature_selection', 'predictions_with_feature_selection', 'label']]

results.to_csv('predictions.csv', index=False)
