In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

data_path = '/kaggle/input/classification'
X_train = np.load(f'{data_path}/X_train.npy')
X_test = np.load(f'{data_path}/X_test.npy')
y_train = np.load(f'{data_path}/y_train.npy')
y_test = np.load(f'{data_path}/y_test.npy')

y_train = y_train.ravel()
y_test = y_test.ravel()

In [4]:
pipe_rf = Pipeline([
    ('clf', RandomForestClassifier(random_state=42))
])

param_grid_rf = {
    'clf__n_estimators': [100, 200, 300],
    'clf__max_depth': [None, 10, 20, 30],
    'clf__min_samples_split': [2, 5, 10],
    'clf__min_samples_leaf': [1, 2, 4]
}


In [5]:
pipe_gb = Pipeline([
    ('clf', GradientBoostingClassifier(random_state=42))
])

param_grid_gb = {
    'clf__n_estimators': [100, 200, 300, 400],
    'clf__learning_rate': [0.01, 0.05, 0.1],
    'clf__max_depth': [3, 5, 7, 9],
    'clf__subsample': [0.8, 0.9, 1.0]
}

In [6]:
pipe_svc = Pipeline([
    ('clf', SVC(random_state=42))
])

param_grid_svc = {
    'clf__C': [0.1, 1, 3, 5 ,10],
    'clf__kernel': ['poly'],
    'clf__gamma': ['scale'],
    'clf__degree': [2, 3, 4, 5]
}

In [8]:
pipelines = {
    'Random Forest Classifier': (pipe_rf, param_grid_rf),
    'Gradient Boosting Classifier': (pipe_gb, param_grid_gb),
    'Support Vector Classifier': (pipe_svc, param_grid_svc),
}

# Fit models and find best parameters
best_models = {}
for model_name, (pipeline, param_grid) in pipelines.items():
    print(f"Training {model_name}...")
    grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    
    best_models[model_name] = grid_search.best_estimator_
    
    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Training accuracy: {grid_search.best_score_:.3f}")
    print("")

# Evaluate on test set
print("Evaluating on test set...")
for model_name, model in best_models.items():
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{model_name} - Test accuracy: {accuracy:.3f}")

Training Random Forest Classifier...
Best parameters: {'clf__max_depth': None, 'clf__min_samples_leaf': 1, 'clf__min_samples_split': 5, 'clf__n_estimators': 100}
Training accuracy: 0.766

Training Gradient Boosting Classifier...
Best parameters: {'clf__learning_rate': 0.05, 'clf__max_depth': 7, 'clf__n_estimators': 400, 'clf__subsample': 0.9}
Training accuracy: 0.764

Training Support Vector Classifier...
Best parameters: {'clf__C': 5, 'clf__degree': 3, 'clf__gamma': 'scale', 'clf__kernel': 'poly'}
Training accuracy: 0.738

Evaluating on test set...
Random Forest Classifier - Test accuracy: 0.792
Gradient Boosting Classifier - Test accuracy: 0.800
Support Vector Classifier - Test accuracy: 0.952
