In [None]:
#importing the Libraies
import numpy as np
import pandas as pd

In [None]:
# Reading the Dataset
dataset = pd.read_csv('/content/Social_Network_Ads.csv')

In [None]:
dataset.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [None]:
dataset.tail()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0
399,15594041,Female,49,36000,1


In [None]:
dataset=pd.get_dummies(dataset,dtype=int,drop_first=True)

In [None]:
dataset

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased,Gender_Male
0,15624510,19,19000,0,1
1,15810944,35,20000,0,1
2,15668575,26,43000,0,0
3,15603246,27,57000,0,0
4,15804002,19,76000,0,1
...,...,...,...,...,...
395,15691863,46,41000,1,0
396,15706071,51,23000,1,1
397,15654296,50,20000,1,0
398,15755018,36,33000,0,1


In [None]:
X=dataset[['Age', 'EstimatedSalary','Gender_Male']]
y=dataset['Purchased']

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)

In [None]:
from sklearn.svm import SVC

In [None]:
#https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter

In [None]:
from sklearn.model_selection import GridSearchCV, cross_val_predict

# Define parameter grid (smaller for speed)
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto'],
    'class_weight': [None, 'balanced']
}

grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3,n_jobs=-1,scoring='f1_weighted')

# fitting the model for grid search
grid.fit(X,y)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


In [None]:
# Cross-validated predictions (instead of X_test)
y_pred = cross_val_predict(grid.best_estimator_, X, y, cv=5)

In [None]:
# Evaluation
from sklearn.metrics import confusion_matrix, classification_report, f1_score
cm = confusion_matrix(y, y_pred)
clf_report = classification_report(y, y_pred)
f1_macro = f1_score(y, y_pred, average='weighted')

In [None]:
print("Best Parameters:", grid.best_params_)
print("The f1_macro:", f1_macro)
print("The confusion Matrix:\n", cm)
print("The report:\n", clf_report)

Best Parameters: {'C': 1, 'class_weight': 'balanced', 'gamma': 'scale', 'kernel': 'rbf'}
The f1_macro: 0.9058866666666667
The confusion Matrix:
 [[231  26]
 [ 12 131]]
The report:
               precision    recall  f1-score   support

           0       0.95      0.90      0.92       257
           1       0.83      0.92      0.87       143

    accuracy                           0.91       400
   macro avg       0.89      0.91      0.90       400
weighted avg       0.91      0.91      0.91       400



In [None]:
# Save Best Model
import pickle
filename = "SVC_CV_best_model.sav"
pickle.dump(grid.best_estimator_, open(filename, 'wb'))

In [None]:
# CV Results table
results_df = pd.DataFrame(grid.cv_results_)
results_df.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_class_weight,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.007663,0.003809,0.007455,0.003087,0.1,,scale,linear,"{'C': 0.1, 'class_weight': None, 'gamma': 'sca...",0.559115,0.949546,0.909305,0.803125,0.708631,0.785944,0.141214,23
1,0.007752,0.001092,0.007065,0.000683,0.1,,scale,rbf,"{'C': 0.1, 'class_weight': None, 'gamma': 'sca...",0.842857,0.937229,0.911364,0.803125,0.887896,0.876494,0.048061,11
2,0.005312,0.000768,0.005411,0.000275,0.1,,auto,linear,"{'C': 0.1, 'class_weight': None, 'gamma': 'aut...",0.559115,0.949546,0.909305,0.803125,0.708631,0.785944,0.141214,23
3,0.007571,0.00147,0.006259,0.00027,0.1,,auto,rbf,"{'C': 0.1, 'class_weight': None, 'gamma': 'aut...",0.842857,0.937229,0.911364,0.803125,0.887896,0.876494,0.048061,11
4,0.0062,0.000727,0.005319,0.000847,0.1,balanced,scale,linear,"{'C': 0.1, 'class_weight': 'balanced', 'gamma'...",0.690376,0.926,0.925,0.764618,0.703383,0.801875,0.104007,13


In [None]:
# -------------------------------
# Random Forest Classifier with GridSearchCV
# -------------------------------
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, f1_score, accuracy_score

# Parameter grid for RF (tune a few important hyperparameters)
param_grid_rf = {
    'n_estimators': [50, 100, 200],      # number of trees
    'max_depth': [None, 5, 10, 20],       # tree depth
    'min_samples_split': [2, 5, 10],      # min samples to split
    'min_samples_leaf': [1, 2, 5],        # min samples per leaf
    'bootstrap': [True, False]    # handle imbalance
}

grid_rf = GridSearchCV(RandomForestClassifier(random_state=42),
                       param_grid_rf,
                       refit=True,
                       verbose=3,
                       n_jobs=-1,
                       scoring='f1_weighted')

grid_rf.fit(X, y)

# Cross-validated predictions
y_pred_rf = cross_val_predict(grid_rf.best_estimator_, X, y, cv=5)

# Evaluation
print("\n=== Random Forest Results ===")
print("Best Parameters:", grid_rf.best_params_)
print("F1 Weighted:", f1_score(y, y_pred_rf, average='weighted'))
print("Accuracy:", accuracy_score(y, y_pred_rf))
print("Confusion Matrix:\n", confusion_matrix(y, y_pred_rf))
print("Classification Report:\n", classification_report(y, y_pred_rf))

# Save Best RF Model
import pickle
pickle.dump(grid_rf.best_estimator_, open("RF_CV_best_model.sav", 'wb'))

# CV Results table (Random Forest)
results_rf_df = pd.DataFrame(grid_rf.cv_results_)
results_rf_df.head()

Fitting 5 folds for each of 216 candidates, totalling 1080 fits

=== Random Forest Results ===
Best Parameters: {'bootstrap': True, 'max_depth': 5, 'min_samples_leaf': 5, 'min_samples_split': 2, 'n_estimators': 50}
F1 Weighted: 0.908201531551432
Accuracy: 0.9075
Confusion Matrix:
 [[233  24]
 [ 13 130]]
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.91      0.93       257
           1       0.84      0.91      0.88       143

    accuracy                           0.91       400
   macro avg       0.90      0.91      0.90       400
weighted avg       0.91      0.91      0.91       400



Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_bootstrap,param_max_depth,param_min_samples_leaf,param_min_samples_split,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.148835,0.002623,0.013251,0.002608,True,,1,2,50,"{'bootstrap': True, 'max_depth': None, 'min_sa...",0.783072,0.937229,0.872729,0.840551,0.81427,0.84957,0.052858,169
1,0.30438,0.016648,0.018453,0.000294,True,,1,2,100,"{'bootstrap': True, 'max_depth': None, 'min_sa...",0.783072,0.937229,0.872729,0.840551,0.80237,0.84719,0.054632,174
2,0.595609,0.020405,0.031819,0.000893,True,,1,2,200,"{'bootstrap': True, 'max_depth': None, 'min_sa...",0.783072,0.925,0.884822,0.840551,0.827074,0.852104,0.048805,163
3,0.149607,0.004711,0.011849,0.000216,True,,1,5,50,"{'bootstrap': True, 'max_depth': None, 'min_sa...",0.798551,0.925,0.898183,0.828357,0.851777,0.860374,0.045885,136
4,0.451106,0.061812,0.029816,0.008388,True,,1,5,100,"{'bootstrap': True, 'max_depth': None, 'min_sa...",0.767183,0.925,0.898183,0.828357,0.83976,0.851697,0.055433,165


In [None]:
# Save Best Model
import pickle
filename = "/content/RF_CV_best_model.sav"
pickle.dump(grid.best_estimator_, open(filename, 'wb'))

In [None]:
# CV Results table
results_df = pd.DataFrame(grid.cv_results_)
results_df.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_class_weight,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.007663,0.003809,0.007455,0.003087,0.1,,scale,linear,"{'C': 0.1, 'class_weight': None, 'gamma': 'sca...",0.559115,0.949546,0.909305,0.803125,0.708631,0.785944,0.141214,23
1,0.007752,0.001092,0.007065,0.000683,0.1,,scale,rbf,"{'C': 0.1, 'class_weight': None, 'gamma': 'sca...",0.842857,0.937229,0.911364,0.803125,0.887896,0.876494,0.048061,11
2,0.005312,0.000768,0.005411,0.000275,0.1,,auto,linear,"{'C': 0.1, 'class_weight': None, 'gamma': 'aut...",0.559115,0.949546,0.909305,0.803125,0.708631,0.785944,0.141214,23
3,0.007571,0.00147,0.006259,0.00027,0.1,,auto,rbf,"{'C': 0.1, 'class_weight': None, 'gamma': 'aut...",0.842857,0.937229,0.911364,0.803125,0.887896,0.876494,0.048061,11
4,0.0062,0.000727,0.005319,0.000847,0.1,balanced,scale,linear,"{'C': 0.1, 'class_weight': 'balanced', 'gamma'...",0.690376,0.926,0.925,0.764618,0.703383,0.801875,0.104007,13
