In [1]:
%%time
import pandas as pd
from sklearn.model_selection import train_test_split
import pickle

CPU times: total: 266 ms
Wall time: 1.34 s


In [2]:
%%time
# Load the dataset
file_path = 'dataset.csv'
df = pd.read_csv(file_path)

sampled_df = df.sample(frac=1, random_state=42)

# Separate features and target column from the sampled data
X_sampled = sampled_df.drop(columns=['calss'])
y_sampled = sampled_df['calss']

# Perform train-test split (80:20 ratio) on the sampled data
X_train, X_test, y_train, y_test  = train_test_split(
    X_sampled, y_sampled, test_size=0.2, random_state=42)

# Check the shape of the sampled and split data
print(f"Shape of X_train_sampled: {X_train.shape}")
print(f"Shape of X_test_sampled: {X_test.shape}")
print(f"Shape of y_train_sampled: {y_train.shape}")
print(f"Shape of y_test_sampled: {y_test.shape}")


Shape of X_train_sampled: (62555, 79)
Shape of X_test_sampled: (15639, 79)
Shape of y_train_sampled: (62555,)
Shape of y_test_sampled: (15639,)
CPU times: total: 547 ms
Wall time: 682 ms


In [3]:
%%time
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import  RandomForestClassifier, BaggingClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.kernel_approximation import RBFSampler

CPU times: total: 31.2 ms
Wall time: 204 ms


In [4]:
%%time
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



CPU times: total: 78.1 ms
Wall time: 155 ms


In [5]:
%%time
n_estimators_values = [5,10, 15]
random_state_values = [42, 61, 91]

CPU times: total: 0 ns
Wall time: 0 ns


In [6]:
%%time
models = {
    'MLP': MLPClassifier(max_iter=1000, random_state=42)
}

CPU times: total: 0 ns
Wall time: 0 ns


In [7]:
%%time
best_models = {}
best_hyperparameters = {}
best_metrix = {}

# Iterate over each model type
for model_name, model in models.items():
    best_accuracy = 0.0
    best_model = None
    best_hyperparameter = {}
    best_precision = 0.0
    best_recall = 0.0
    best_f1_score = 0.0

    # Iterate over hyperparameter values
    for n_estimators in n_estimators_values:
        for random_state in random_state_values:
            # Initialize Bagging Classifier with current hyperparameters
            bagging_model = BaggingClassifier(estimator=model, n_estimators=n_estimators, random_state=random_state)

            # Fit model and make predictions
            bagging_model.fit(X_train_scaled, y_train)
            y_pred = bagging_model.predict(X_test_scaled)

            # Calculate metrics
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred, average='weighted',zero_division=0.0)
            recall = recall_score(y_test, y_pred, average='weighted',zero_division=0.0)
            f1 = f1_score(y_test, y_pred, average='weighted')
            
            # Update best model if accuracy improves
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_model = bagging_model
                best_hyperparameter = {'n_estimators': n_estimators, 'random_state': random_state}
                best_precision = precision
                best_recall = recall
                best_f1_score = f1
            print(f"Done model : {model},n_estimators: {n_estimators},random_state : {random_state}, accuracy : {accuracy}")

    # Store best model, hyperparameters, and metrices for current classifier type
    best_models[model_name] = best_model
    best_hyperparameters[model_name] = best_hyperparameter
    best_metrix[model_name] = {'Accuracy': best_accuracy, 'Precision': best_precision, 'Recall': best_recall, 'F1-score': best_f1_score}



Done model : MLPClassifier(max_iter=1000, random_state=42),n_estimators: 5,random_state : 42, accuracy : 0.8045271436792634
Done model : MLPClassifier(max_iter=1000, random_state=42),n_estimators: 5,random_state : 61, accuracy : 0.8058699405332822
Done model : MLPClassifier(max_iter=1000, random_state=42),n_estimators: 5,random_state : 91, accuracy : 0.8032482895325788
Done model : MLPClassifier(max_iter=1000, random_state=42),n_estimators: 10,random_state : 42, accuracy : 0.8070209092652983
Done model : MLPClassifier(max_iter=1000, random_state=42),n_estimators: 10,random_state : 61, accuracy : 0.8067011957286272
Done model : MLPClassifier(max_iter=1000, random_state=42),n_estimators: 10,random_state : 91, accuracy : 0.806637253021293
Done model : MLPClassifier(max_iter=1000, random_state=42),n_estimators: 15,random_state : 42, accuracy : 0.8086834196559882
Done model : MLPClassifier(max_iter=1000, random_state=42),n_estimators: 15,random_state : 61, accuracy : 0.8091310186073278
Done

In [8]:
%%time
# Print best models, hyperparameters, and metrices
for model_name, metrics in best_metrix.items():
    print(f"Best {model_name} Model:")
    print(f"Hyperparameters: {best_hyperparameters[model_name]}")
    for metric_name, value in metrics.items():
        print(f"{metric_name}: {value}")
    print()

Best MLP Model:
Hyperparameters: {'n_estimators': 15, 'random_state': 61}
Accuracy: 0.8091310186073278
Precision: 0.8101232459288811
Recall: 0.8091310186073278
F1-score: 0.8078110578627358

CPU times: total: 0 ns
Wall time: 1 ms


In [10]:
with open(f'mlp.pkl', 'wb') as file:
    pickle.dump(best_model, file)