# Classification Challenge - using DNN and PyTorch
## Ryhan


In [6]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

Let's first load the dataset and take a look at some random samples.

In [7]:
# Load the training dataset
data = pd.read_csv('wine.csv')
data.sample(10)


Unnamed: 0,Alcohol,Malic_acid,Ash,Alcalinity,Magnesium,Phenols,Flavanoids,Nonflavanoids,Proanthocyanins,Color_intensity,Hue,OD280_315_of_diluted_wines,Proline,WineVariety
14,14.38,1.87,2.38,12.0,102,3.3,3.64,0.29,2.96,7.5,1.2,3.0,1547,0
36,13.28,1.64,2.84,15.5,110,2.6,2.68,0.34,1.36,4.6,1.09,2.78,880,0
112,11.76,2.68,2.92,20.0,103,1.75,2.03,0.6,1.05,3.8,1.23,2.5,607,1
152,13.11,1.9,2.75,25.5,116,2.2,1.28,0.26,1.56,7.1,0.61,1.33,425,2
40,13.56,1.71,2.31,16.2,117,3.15,3.29,0.34,2.34,6.13,0.95,3.38,795,0
135,12.6,2.46,2.2,18.5,94,1.62,0.66,0.63,0.94,7.1,0.73,1.58,695,2
124,11.87,4.31,2.39,21.0,82,2.86,3.03,0.21,2.91,2.8,0.75,3.64,380,1
75,11.66,1.88,1.92,16.0,97,1.61,1.57,0.34,1.15,3.8,1.23,2.14,428,1
138,13.49,3.59,2.19,19.5,88,1.62,0.48,0.58,0.88,5.7,0.81,1.82,580,2
61,12.64,1.36,2.02,16.8,100,2.02,1.41,0.53,0.62,5.75,0.98,1.59,450,1


### The dataset consists of 12 numeric features and a classification label with the following classes:

 

    0 (variety A)\
   
    1 (variety B)
    
    2 (variety C)

Our goal is to train a classification model that achieves an overall Recall metric of over 0.95 (95%).

In [2]:
# Check for missing values
print("Missing values for each feature:")
print(data.isnull().sum())

Missing values for each feature:
Alcohol                       0
Malic_acid                    0
Ash                           0
Alcalinity                    0
Magnesium                     0
Phenols                       0
Flavanoids                    0
Nonflavanoids                 0
Proanthocyanins               0
Color_intensity               0
Hue                           0
OD280_315_of_diluted_wines    0
Proline                       0
WineVariety                   0
dtype: int64


In [23]:
print(data.columns)

Index(['Alcohol', 'Malic_acid', 'Ash', 'Alcalinity', 'Magnesium', 'Phenols',
       'Flavanoids', 'Nonflavanoids', 'Proanthocyanins', 'Color_intensity',
       'Hue', 'OD280_315_of_diluted_wines', 'Proline', 'WineVariety'],
      dtype='object')


In [32]:
print(data.WineVariety)

0      0
1      0
2      0
3      0
4      0
      ..
173    2
174    2
175    2
176    2
177    2
Name: WineVariety, Length: 178, dtype: int64


In [34]:
print(data.dtypes)

Alcohol                       float64
Malic_acid                    float64
Ash                           float64
Alcalinity                    float64
Magnesium                       int64
Phenols                       float64
Flavanoids                    float64
Nonflavanoids                 float64
Proanthocyanins               float64
Color_intensity               float64
Hue                           float64
OD280_315_of_diluted_wines    float64
Proline                         int64
WineVariety                     int64
dtype: object


In [5]:
# Check the distribution of the target variable
print("\nDistribution of target variable:")
print(data['WineVariety'].value_counts())


Distribution of target variable:
1    71
0    59
2    48
Name: WineVariety, dtype: int64


# Train and evaluate a model



In [9]:
# Separate the features and target variable
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [10]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [26]:
# Standardize the data
#scaler = StandardScaler()
#X_train = scaler.fit_transform(X_train)
#X_test = scaler.transform(X_test)

In [27]:
# Check the type of X_train and y_train
if isinstance(X_train, (pd.DataFrame, pd.Series)):
    X_train = X_train.values
if isinstance(y_train, (pd.DataFrame, pd.Series)):
    y_train = y_train.values

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)

# Adjust labels if needed
if y_train.max().item() >= len(set(y_train.numpy())):
    y_train -= 1  # Ensuring labels start from 0


  X_train = torch.tensor(X_train, dtype=torch.float32)
  y_train = torch.tensor(y_train, dtype=torch.long)


### DNN Model

In [40]:
class WineClassifier(nn.Module):
    def __init__(self, input_size):
        super(WineClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 10)
        self.fc2 = nn.Linear(10, 10)
        self.fc3 = nn.Linear(10, 3)  # 3 classes

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)


### Model Training

Now, let's preprocess the data and train a classification model.

In [65]:
# Instantiate the model, loss function, and optimizer
model = WineClassifier(X_train.shape[1])
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 100

for epoch in range(epochs):
    
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    # Compute accuracy
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_train).float().mean().item() * 100
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

#     if (epoch + 1) % 10 == 0:
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}, Accuracy: {accuracy:.2f}%")


Epoch 1/100, Loss: 1.0944, Accuracy: 15.49%
Epoch 2/100, Loss: 1.0921, Accuracy: 16.90%
Epoch 3/100, Loss: 1.0897, Accuracy: 18.31%
Epoch 4/100, Loss: 1.0874, Accuracy: 19.01%
Epoch 5/100, Loss: 1.0851, Accuracy: 19.72%
Epoch 6/100, Loss: 1.0829, Accuracy: 21.83%
Epoch 7/100, Loss: 1.0806, Accuracy: 23.24%
Epoch 8/100, Loss: 1.0783, Accuracy: 25.35%
Epoch 9/100, Loss: 1.0760, Accuracy: 26.06%
Epoch 10/100, Loss: 1.0737, Accuracy: 28.17%
Epoch 11/100, Loss: 1.0714, Accuracy: 30.28%
Epoch 12/100, Loss: 1.0691, Accuracy: 31.69%
Epoch 13/100, Loss: 1.0668, Accuracy: 32.39%
Epoch 14/100, Loss: 1.0645, Accuracy: 34.51%
Epoch 15/100, Loss: 1.0622, Accuracy: 37.32%
Epoch 16/100, Loss: 1.0599, Accuracy: 39.44%
Epoch 17/100, Loss: 1.0575, Accuracy: 39.44%
Epoch 18/100, Loss: 1.0551, Accuracy: 40.14%
Epoch 19/100, Loss: 1.0526, Accuracy: 41.55%
Epoch 20/100, Loss: 1.0502, Accuracy: 42.96%
Epoch 21/100, Loss: 1.0476, Accuracy: 42.25%
Epoch 22/100, Loss: 1.0451, Accuracy: 44.37%
Epoch 23/100, Loss:

### Evaluate

In [53]:
with torch.no_grad():
    y_pred = model(X_test)
    y_pred_classes = y_pred.argmax(dim=1)
    accuracy = (y_pred_classes == y_test).float().mean()
    print(f"Accuracy: {accuracy:.4f}")


Accuracy: 0.9167
