A2)  Use cross-validation techniques (RandomizedSearchCV()) technique to tune the
hyperparameters for your models.

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

file_path = "/content/drive/MyDrive/Colab Notebooks/ML_PROJECT/DWI_with_Labels.xlsx"
df = pd.read_excel(file_path)

X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf = RandomForestClassifier(random_state=42)

param_dist = {
    'n_estimators': [100, 200, 300, 500],
    'max_depth': [None, 10, 20, 30, 50],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

random_search = RandomizedSearchCV(
    estimator=rf,
    param_distributions=param_dist,
    n_iter=20,
    cv=5,
    verbose=2,
    n_jobs=-1,
    random_state=42
)

random_search.fit(X_train, y_train)

best_model = random_search.best_estimator_

print("Best Hyperparameters:", random_search.best_params_)

train_preds = best_model.predict(X_train)
test_preds = best_model.predict(X_test)

print("\nTrain Accuracy:", accuracy_score(y_train, train_preds))
print("Test Accuracy:", accuracy_score(y_test, test_preds))
print("\nClassification Report:\n", classification_report(y_test, test_preds))

Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best Hyperparameters: {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_depth': 50, 'bootstrap': False}

Train Accuracy: 1.0
Test Accuracy: 0.7823529411764706

Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.88      0.81        91
           1       0.83      0.67      0.74        79

    accuracy                           0.78       170
   macro avg       0.79      0.78      0.78       170
weighted avg       0.79      0.78      0.78       170



A3)

In [9]:
!pip install --upgrade numpy catboost --quiet
!pip install --force-reinstall numpy==1.24.4 catboost==1.2 --quiet


[31mERROR: Operation cancelled by user[0m[31m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.5/102.5 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m68.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.5/98.5 MB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m108.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.1/47.1 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m120.3 MB/s[0m eta [36m0:00:00

In [11]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings("ignore")

classifiers = {
    "SVM": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "XGBoost": XGBClassifier(verbosity=0, use_label_encoder=False),
    "Naive Bayes": GaussianNB(),
    "MLP": MLPClassifier(max_iter=300)
}

results = []

for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    train_acc = accuracy_score(y_train, clf.predict(X_train))
    test_acc = accuracy_score(y_test, clf.predict(X_test))
    results.append({
        "Model": name,
        "Train Accuracy": round(train_acc, 4),
        "Test Accuracy": round(test_acc, 4)
    })
    print(f"\n=== {name} ===")
    print("Classification Report:\n", classification_report(y_test, clf.predict(X_test)))

results_df = pd.DataFrame(results)
print("\n\n=== Summary Table ===")
print(results_df)



=== SVM ===
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.82      0.81        91
           1       0.79      0.76      0.77        79

    accuracy                           0.79       170
   macro avg       0.79      0.79      0.79       170
weighted avg       0.79      0.79      0.79       170


=== Decision Tree ===
Classification Report:
               precision    recall  f1-score   support

           0       0.68      0.70      0.69        91
           1       0.64      0.62      0.63        79

    accuracy                           0.66       170
   macro avg       0.66      0.66      0.66       170
weighted avg       0.66      0.66      0.66       170


=== Random Forest ===
Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.90      0.83        91
           1       0.86      0.68      0.76        79

    accuracy                           0.80       1