<a href="https://colab.research.google.com/github/NINJAHATTORI004/csi/blob/main/task6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import pandas as pd
import numpy as np

# Load dataset from provided CSV
data = pd.read_csv('wine.csv')
X = data.drop('Wine', axis=1)
y = data['Wine']

# Split data - using larger test size since dataset is small
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Dynamic cross-validation based on sample size
n_splits = min(5, len(X_train))  # Ensure folds don't exceed samples
cv_strategy = KFold(n_splits=n_splits, shuffle=True, random_state=42)

# Initialize models
models = {
    'Random Forest': RandomForestClassifier(random_state=42),
    'SVM': SVC(random_state=42)
}

# Evaluate base models
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    results[name] = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred, average='weighted'),
        'Recall': recall_score(y_test, y_pred, average='weighted'),
        'F1': f1_score(y_test, y_pred, average='weighted')
    }

# Hyperparameter tuning with GridSearchCV (Random Forest)
param_grid_rf = {
    'n_estimators': [50, 100],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5]
}

grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid_rf,
    scoring='f1_weighted',
    cv=cv_strategy,  # Use dynamic CV strategy
    n_jobs=-1
)
grid_search.fit(X_train, y_train)
best_rf = grid_search.best_estimator_

# Hyperparameter tuning with RandomizedSearchCV (SVM)
param_dist_svm = {
    'C': np.logspace(-2, 2, 5),
    'gamma': np.logspace(-2, 2, 5),
    'kernel': ['linear', 'rbf']
}

random_search = RandomizedSearchCV(
    estimator=SVC(random_state=42),
    param_distributions=param_dist_svm,
    n_iter=10,  # Reduced iterations for small dataset
    scoring='f1_weighted',
    cv=cv_strategy,  # Use dynamic CV strategy
    n_jobs=-1,
    random_state=42
)
random_search.fit(X_train, y_train)
best_svm = random_search.best_estimator_

# Evaluate tuned models
tuned_models = {
    'Tuned Random Forest': best_rf,
    'Tuned SVM': best_svm
}

for name, model in tuned_models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    results[name] = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred, average='weighted'),
        'Recall': recall_score(y_test, y_pred, average='weighted'),
        'F1': f1_score(y_test, y_pred, average='weighted')
    }

# Compare results
results_df = pd.DataFrame(results).T
print(results_df.sort_values(by='F1', ascending=False))

# Select best model
best_model_name = results_df['F1'].idxmax()
best_model = tuned_models.get(best_model_name) or models.get(best_model_name)
print(f"\nBest model: {best_model_name}")
print(f"Best parameters: {best_model.get_params()}")


                     Accuracy  Precision    Recall        F1
Random Forest        1.000000   1.000000  1.000000  1.000000
Tuned Random Forest  1.000000   1.000000  1.000000  1.000000
Tuned SVM            1.000000   1.000000  1.000000  1.000000
SVM                  0.759259   0.753704  0.759259  0.723484

Best model: Random Forest
Best parameters: {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}
