In [2]:
from google.colab import files
uploaded=files.upload()

Saving data.csv to data.csv


In [3]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Load dataset
df = pd.read_csv("data.csv")  # Update path if needed
X = df.drop('target', axis=1)
y = df['target']

# Preprocessing
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Model and param grid
rf = RandomForestClassifier(random_state=42)
param_dist = {
    'n_estimators': [50, 100, 150, 200],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Randomized search
search = RandomizedSearchCV(rf, param_distributions=param_dist, n_iter=10, cv=5, random_state=42, n_jobs=-1)
search.fit(X_train, y_train)

print("Best Parameters:", search.best_params_)
print("\nClassification Report:")
print(classification_report(y_test, search.predict(X_test)))


Best Parameters: {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_depth': 5, 'bootstrap': True}

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.91      0.87        33
           1       0.83      0.71      0.77        21

    accuracy                           0.83        54
   macro avg       0.83      0.81      0.82        54
weighted avg       0.83      0.83      0.83        54



In [5]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
import warnings

warnings.filterwarnings('ignore')

models = {
    "SVM": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Naive Bayes": GaussianNB(),
    "MLP": MLPClassifier(max_iter=1000),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss'),

}

results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    train_score = model.score(X_train, y_train)
    test_score = model.score(X_test, y_test)
    results.append((name, train_score, test_score))

# Tabulate results
print(f"{'Model':<15} {'Train Accuracy':<15} {'Test Accuracy'}")
for r in results:
    print(f"{r[0]:<15} {r[1]:<15.2f} {r[2]:.2f}")


Model           Train Accuracy  Test Accuracy
SVM             0.91            0.89
Decision Tree   1.00            0.69
Random Forest   1.00            0.85
AdaBoost        0.87            0.87
Naive Bayes     0.85            0.91
MLP             1.00            0.83
XGBoost         1.00            0.81
