# Sample Solution for Lab 2

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
import tensorflow as tf
from tensorflow import keras
import numpy as np

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.base import BaseEstimator, TransformerMixin

# Load the Wine dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data"
column_names = ["Class", "Alcohol", "Malicacid", "Ash", "Alcalinity of ash", 'Magnesium', 'Total_phenols', 'Flavanoids', 'Nonflavanoid_phenols', 'Proanthocyanins', 'Color_intensity', 'Hue',
                '0D280_0D315_of_diluted_wines', 'Proline']
data = pd.read_csv(url, names=column_names)

In [None]:
data

Unnamed: 0,Class,Alcohol,Malicacid,Ash,Alcalinity of ash,Magnesium,Total_phenols,Flavanoids,Nonflavanoid_phenols,Proanthocyanins,Color_intensity,Hue,0D280_0D315_of_diluted_wines,Proline
0,1,14.23,1.71,2.43,15.6,127,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.20,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050
2,1,13.16,2.36,2.67,18.6,101,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.50,16.8,113,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,3,13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740
174,3,13.40,3.91,2.48,23.0,102,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750
175,3,13.27,4.28,2.26,20.0,120,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835
176,3,13.17,2.59,2.37,20.0,120,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840


# Split your test and train data using stratified sampling and one-hot encode them

In [None]:
X = data.drop("Class", axis=1).copy()
y = data["Class"].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y)


In [None]:
encoder = OneHotEncoder(sparse=False)
y_train_encoded = encoder.fit_transform(y_train[:, np.newaxis])
y_test_encoded = encoder.transform(y_test[:, np.newaxis])

  y_train_encoded = encoder.fit_transform(y_train[:, np.newaxis])
  y_test_encoded = encoder.transform(y_test[:, np.newaxis])


In [None]:
# Data Preprocessing Pipeline
# I will add this to my final pipeline later
# I simply used SimpleImputer because there are no missing values in the dataset and this won't change anything, you can use better things
preprocessor = Pipeline(steps=[('imputer', SimpleImputer(strategy='mean')), (scaler', MinMaxScaler())])

# Define your keras model as a class that has fit function so that you can change the model dynamically and can also add it to the sklearn pipeline

If you check the sklearn documentation online (https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html) you can see this statement: Pipeline of transforms with a final estimator. Sequentially apply a list of transforms and a final estimator. Intermediate steps of the pipeline must be ‘transforms’, that is, they must implement fit and transform methods. The final estimator only needs to implement fit. The transformers in the pipeline can be cached using memory argument.

In [None]:
class KerasClassifier(BaseEstimator, TransformerMixin):
    def __init__(self, model_func, **kwargs):
        self.model_func = model_func
        self.kwargs = kwargs
        self.model_ = None

    def fit(self, X, y):
        self.model_ = self.model_func(**self.kwargs)
        self.model_.fit(X, y, epochs=10, batch_size=32, verbose=0)
        return self

    def predict(self, X):
      # The argmax here converts the one-hot encoding to label format
        return np.argmax(self.model_.predict(X), axis=1)

# Neural Network Model with Keras
def create_model(layers=1, neurons=32, learning_rate=0.001):
    model = keras.Sequential()
    for _ in range(layers):
        model.add(keras.layers.Dense(neurons, activation='relu'))
    model.add(keras.layers.Dense(3, activation='softmax'))  # Assuming 3 classes in wine dataset
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Define your loops to do hyperparameter tunning using stratified k-fold

In [None]:
# Hyperparameter tuning
best_f1 = 0
best_params = None
layer_grid = [1, 2, 3]
neuron_grid = [32, 64, 128]
lr_grid = [0.001, 0.01, 0.1]

# Using StratifiedKFold for cross-validation
skf = StratifiedKFold(n_splits=10)

for layers in layer_grid:
    for neurons in neuron_grid:
        for lr in lr_grid:
            f1_scores = []
            for train_idx, val_idx in skf.split(X_train, y_train):
                X_train_fold = X_train.iloc[train_idx]
                y_train_fold = y_train_encoded[train_idx]
                X_val_fold = X_train.iloc[val_idx]
                y_val_fold = y_train_encoded[val_idx]

                pipeline = Pipeline([
                    ('preprocessor', preprocessor),
                    ('classifier', KerasClassifier(create_model, layers=layers, neurons=neurons, learning_rate=lr))
                ])

                # Fit and predict for current fold
                pipeline.fit(X_train_fold, y_train_fold)
                y_pred_fold = pipeline.predict(X_val_fold)
                # Convert y_val_fold from one-hot encoded to label format
                new_y_val_fold = np.argmax(y_val_fold, axis=1)

                f1 = f1_score(new_y_val_fold, y_pred_fold, average='macro')
                f1_scores.append(f1)

            # Average F1 score for the current hyperparameters
            avg_f1 = np.mean(f1_scores)

            if avg_f1 > best_f1:
                best_f1 = avg_f1
                best_params = {'layers': layers, 'neurons': neurons, 'learning_rate': lr}

            print(f"Layers: {layers}, Neurons: {neurons}, Learning rate: {lr}, Avg F1 Score: {avg_f1}")

print(f"Best F1 Score: {best_f1} with parameters {best_params}")









Layers: 1, Neurons: 32, Learning rate: 0.001, Avg F1 Score: 0.7756232656232656
Layers: 1, Neurons: 32, Learning rate: 0.01, Avg F1 Score: 0.9704325304325303
Layers: 1, Neurons: 32, Learning rate: 0.1, Avg F1 Score: 0.9769164169164168
Layers: 1, Neurons: 64, Learning rate: 0.001, Avg F1 Score: 0.873130573130573
Layers: 1, Neurons: 64, Learning rate: 0.01, Avg F1 Score: 0.9648381248381247
Layers: 1, Neurons: 64, Learning rate: 0.1, Avg F1 Score: 0.963967883967884
Layers: 1, Neurons: 128, Learning rate: 0.001, Avg F1 Score: 0.9348024198024198
Layers: 1, Neurons: 128, Learning rate: 0.01, Avg F1 Score: 0.9709904909904908
Layers: 1, Neurons: 128, Learning rate: 0.1, Avg F1 Score: 0.9709904909904911
Layers: 2, Neurons: 32, Learning rate: 0.001, Avg F1 Score: 0.8226060976060975
Layers: 2, Neurons: 32, Learning rate: 0.01, Avg F1 Score: 0.9769164169164168
Layers: 2, Neurons: 32, Learning rate: 0.1, Avg F1 Score: 0.9576042476042476
Layers: 2, Neurons: 64, Learning rate: 0.001, Avg F1 Score: 0.9

# Train once more with the best parameters on the whole training set

In [None]:
#Training with the best parameters

pipeline = Pipeline([
                    ('preprocessor', preprocessor),
                    ('classifier', KerasClassifier(create_model, layers=2, neurons=64, learning_rate=0.01))
                ])

# Fit
pipeline.fit(X_train, y_train_encoded)
# Predict on the test data
y_pred = pipeline.predict(X_test)
# Convert y_test from one-hot encoded to label format
new_y_test = np.argmax(y_test_encoded, axis=1)
f1_score(new_y_test, y_pred, average='macro')



0.9474747474747475