<a href="https://colab.research.google.com/github/MohitTiwari-07/Contents/blob/main/Assignment_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**First, import the required modules and classes:**

In [20]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

## Load the Wine dataset

In [2]:

wine = datasets.load_wine()
X = wine.data
y = wine.target

In [3]:
import pandas as pd
from sklearn import datasets


wine = datasets.load_wine()
X = wine.data
y = wine.target

# Convert to DataFrame
df = pd.DataFrame(X, columns=wine.feature_names)
df['target'] = y

# Show the first 5 rows
print(df.head())


   alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0    14.23        1.71  2.43               15.6      127.0           2.80   
1    13.20        1.78  2.14               11.2      100.0           2.65   
2    13.16        2.36  2.67               18.6      101.0           2.80   
3    14.37        1.95  2.50               16.8      113.0           3.85   
4    13.24        2.59  2.87               21.0      118.0           2.80   

   flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
0        3.06                  0.28             2.29             5.64  1.04   
1        2.76                  0.26             1.28             4.38  1.05   
2        3.24                  0.30             2.81             5.68  1.03   
3        3.49                  0.24             2.18             7.80  0.86   
4        2.69                  0.39             1.82             4.32  1.04   

   od280/od315_of_diluted_wines  proline  target  
0          

## Split the dataset into training and testing sets

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=24)


## Create a baseline SVM classifier

In [5]:
baseline_svm = SVC()
baseline_svm.fit(X_train, y_train)
y_pred = baseline_svm.predict(X_test)

## Evaluate the baseline model

In [6]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Baseline SVM Accuracy: {accuracy:.2f}")

Baseline SVM Accuracy: 0.78


GridSearchCV in Scikit-Learn


First, import the GridSearchCV class from scikit-learn’s model_selection module:

In [22]:
from sklearn.model_selection import GridSearchCV

## Define the hyperparameter grid

In [8]:
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': [0.1, 1, 'scale', 'auto']
}

## Create the GridSearchCV object

In [23]:

grid_search = GridSearchCV(estimator=baseline_svm, param_grid=param_grid, cv=5)




### Fit the model with the grid of hyperparameters

In [25]:

grid_search.fit(X_train, y_train)

### Evaluate the best model

In [10]:
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_



y_pred_best = best_model.predict(X_test)
accuracy_best = accuracy_score(y_test, y_pred_best)
print(f"Best SVM Accuracy: {accuracy_best:.2f}")
print(f"Best Hyperparameters: {best_params}")

Best SVM Accuracy: 0.94
Best Hyperparameters: {'C': 0.1, 'gamma': 0.1, 'kernel': 'poly'}


### Evaluate baseline SVM using multiple metrics

In [11]:
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report


precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

print("\nBaseline SVM Metrics:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")



Baseline SVM Metrics:
Accuracy: 0.78
Precision: 0.76
Recall: 0.77
F1 Score: 0.76


In [12]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

# Create a baseline Random Forest model
baseline_rf = RandomForestClassifier(random_state=42)
baseline_rf.fit(X_train, y_train)
y_pred_rf = baseline_rf.predict(X_test)

# Evaluate the baseline Random Forest
acc_rf = accuracy_score(y_test, y_pred_rf)
print(f"\nBaseline Random Forest Accuracy: {acc_rf:.2f}")



Baseline Random Forest Accuracy: 0.97


In [16]:
models = {
    'SVC': {
        'model': SVC(),
        'params': {
            'C': [0.1, 1, 10],
            'kernel': ['linear', 'rbf'],
            'gamma': ['scale', 'auto']
        },
        'search': 'grid'  # use GridSearchCV
    },
    'RandomForest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': [50, 100, 150],
            'max_depth': [None, 10, 20],
            'min_samples_split': [2, 5]
        },
        'search': 'random'  # use RandomizedSearchCV
    },

}


# # Evaluation

In [17]:
results = []

for name, m in models.items():
    print(f"\n🔍 Tuning and training: {name}")

    if m['search'] == 'grid':
        search = GridSearchCV(m['model'], m['params'], cv=5, scoring='accuracy')
    else:
        search = RandomizedSearchCV(m['model'], m['params'], n_iter=5, cv=5, scoring='accuracy', random_state=42)

    search.fit(X_train, y_train)
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test)


    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='macro')
    rec = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')


    results.append({
        'Model': name,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-score': f1,
        'Best Parameters': search.best_params_
    })

    print(f"Best Params: {search.best_params_}")
    print(f"Accuracy: {acc:.2f}, Precision: {prec:.2f}, Recall: {rec:.2f}, F1: {f1:.2f}")




🔍 Tuning and training: SVC
Best Params: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}
Accuracy: 0.92, Precision: 0.91, Recall: 0.93, F1: 0.92

🔍 Tuning and training: RandomForest
Best Params: {'n_estimators': 50, 'min_samples_split': 2, 'max_depth': None}
Accuracy: 0.97, Precision: 0.96, Recall: 0.98, F1: 0.97


### Convert results to DataFrame

In [26]:

results_df = pd.DataFrame(results).sort_values(by='F1-score', ascending=False)



### Display the comparison table

In [27]:


print("\n📊 Final Model Comparison:")
print(results_df.to_string(index=False))


📊 Final Model Comparison:
       Model  Accuracy  Precision   Recall  F1-score                                                 Best Parameters
RandomForest  0.972222   0.962963 0.976190  0.968046 {'n_estimators': 50, 'min_samples_split': 2, 'max_depth': None}
         SVC  0.916667   0.913512 0.928571  0.919546                {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}
