# **Possibly relevant libraries**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import math
import warnings
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler, RobustScaler, OneHotEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score, RepeatedStratifiedKFold, RandomizedSearchCV, GridSearchCV
from sklearn.linear_model import LinearRegression, LogisticRegression, LassoCV, RidgeCV
from sklearn.metrics import mean_squared_error, f1_score, classification_report, accuracy_score, recall_score, precision_score, confusion_matrix, roc_auc_score, ConfusionMatrixDisplay
from sklearn.ensemble import RandomForestRegressor, AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier, BaggingClassifier
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.impute import SimpleImputer
from sklearn.svm import SVC
from xgboost import XGBClassifier
warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", None)
pd.set_option("display.float_format", lambda x: "%.3f" % x)
np.random.seed(42)

# **Loading Data**

In this specific template, we are attempting to predict the classification for the test dataset, adapt the code according to your necessities

In [None]:
X_train = #train_dataset
X_val = #val_dataset
X_test = #test_dataset
y_train = #y_train_dataset
y_val = #y_val_split

### Training and evaluation of Multiple Models at once ###

In [None]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=200),
    "Random Forest": RandomForestClassifier(random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Bagging": BaggingClassifier(random_state=42),
    'KNeighborsClassifier': KNeighborsClassifier(),
    "GBM": GradientBoostingClassifier(random_state=42),
    "XGB":XGBClassifier(random_state=42),
    "Neural Network": MLPClassifier(random_state=42)
}
results = {}
for name, model in models.items():
    print(f"Training {name}...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    results[name] = {
        'model': model,
        'f1_macro': f1_score(y_val, y_pred, average='macro'),
        'accuracy': accuracy_score(y_val, y_pred)
    }
    #Adapt the metrics you want to use for evaluation
    print("\nModel Evaluation Results:")
    for model_name, metrics in results.items():
        print(f"{model_name}: F1 macro = {metrics['f1_macro']:.4f}, Accuracy = {metrics['accuracy']:.4f}")

### Exporting Predictions (Useful for Kaggle Submissions) ###

In [None]:
predictions = {}
for name, model in models.items():
        print(f"Predicting with {name}...")
        predictions[name] = model.predict(X_test)  # Predict on df_test
    
for name, preds in predictions.items():
    preds = [[pred] for pred in preds] 
    predictions_df = pd.DataFrame({
        'id': X_test.index,
        'prediction': preds
    })

    predictions_df.to_csv(f'{name}_predictions.csv', index=False)
    print(f"Predictions for {name} saved to '{name}_predictions.csv'")