#### Load Package

In [1]:
import os

import numpy as np
import pandas as pd
import pickle

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn

#### Setup

In [2]:
DATA_DIR = "/Users/kuldeepsharma/github/mlops/iris-mlops/data/"
RAW_DATA_PATH = DATA_DIR + "raw_data.csv"
PROCESSED_DATA_PATH = DATA_DIR + "processed_data.csv"
MODEL_PATH = "/Users/kuldeepsharma/github/mlops/iris-mlops/models/"

#### Extract Data

In [3]:
data = pd.read_csv(RAW_DATA_PATH)
data.head(3)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,52,6.4,3.2,4.5,1.5,Iris-versicolor
1,116,6.4,3.2,5.3,2.3,Iris-virginica
2,23,4.6,3.6,1.0,0.2,Iris-setosa


#### Transform Data

In [4]:
### Remove 'Id' Column
data = data.drop(columns=['Id'], axis=1)

### Rename Columns
data.columns = ["sepal_lenght", "sepal_width", "petal_lenght",
                    "petal_width", "target"]

### Encode Labels
data["target"] = data["target"].map(
        {"Iris-setosa": 0, "Iris-versicolor": 1, "Iris-virginica": 2}
    )

### Drop Duplicates
data = data.drop_duplicates()

data.head(3)

Unnamed: 0,sepal_lenght,sepal_width,petal_lenght,petal_width,target
0,6.4,3.2,4.5,1.5,1
1,6.4,3.2,5.3,2.3,2
2,4.6,3.6,1.0,0.2,0


#### Load Data

In [5]:
data.to_csv(PROCESSED_DATA_PATH, index=False)

#### Prepara Data

In [6]:
### Split data in train, val and test
X = data.drop(columns=['target']).values
y = data['target'].values

X_train, X_temp, y_train, y_temp =\
    train_test_split(X, y, test_size=0.3, stratify=y, random_state=3)
X_val, X_test, y_val, y_test =\
    train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=3)

### Convert to tensors
X_train = torch.FloatTensor(X_train)
X_val = torch.FloatTensor(X_val)
X_test = torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train)
y_val = torch.LongTensor(y_val)
y_test = torch.LongTensor(y_test)

#### Model & Parameters

In [7]:
class SimpleNeuralNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleNeuralNetwork,self).__init__()
        self.input_layer    = nn.Linear(input_dim,128)
        self.hidden_layer1  = nn.Linear(128,64)
        self.output_layer   = nn.Linear(64,output_dim)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x =  self.relu(self.input_layer(x))
        x =  self.relu(self.hidden_layer1(x))
        x =  self.output_layer(x)
        return x
    

### Parameters
num_epochs = 100
input_dim  = 4 
output_dim = 3
model = SimpleNeuralNetwork(input_dim, output_dim)

# Creating our optimizer and loss function
learning_rate = 0.01
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#### Train & Validate

In [8]:
def train_n_validate_model(model, optimizer, criterion,
    X_train, y_train, X_val, y_val, num_epochs):
    
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()

        output_train = model(X_train)
        loss_train = criterion(output_train, y_train)
        loss_train.backward()
        optimizer.step()

        loss_train = loss_train.item()
        _, predicted_train = torch.max(output_train, 1)
        correct_predictions_train = (predicted_train == y_train).sum().item()
        total_train_samples = y_train.size(0) * 1.0
        
        model.eval()
        output_val = model(X_val)
        loss_val = criterion(output_val, y_val)
        _, predicted_val = torch.max(output_val, 1)
        correct_predictions_val = (predicted_val == y_val).sum().item()
        total_val_samples = y_val.size(0) * 1.0

        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}/{num_epochs}")
            print(f"Train Loss: {loss_train:.4f}")
            print(f"Train Accuracy: {correct_predictions_train/total_train_samples:.4f}")
            print(f"Val Loss: {loss_val:.4f}")
            print(f"Val Accuracy: {correct_predictions_val/total_val_samples:.4f}")

train_n_validate_model(model, optimizer, criterion, X_train, y_train, X_val, y_val, num_epochs)

Epoch 10/100
Train Loss: 0.4426
Train Accuracy: 0.8795
Val Loss: 0.3795
Val Accuracy: 0.7222
Epoch 20/100
Train Loss: 0.2020
Train Accuracy: 0.9518
Val Loss: 0.1524
Val Accuracy: 0.9444
Epoch 30/100
Train Loss: 0.0782
Train Accuracy: 1.0000
Val Loss: 0.0513
Val Accuracy: 1.0000
Epoch 40/100
Train Loss: 0.0506
Train Accuracy: 1.0000
Val Loss: 0.0240
Val Accuracy: 1.0000
Epoch 50/100
Train Loss: 0.0384
Train Accuracy: 0.9759
Val Loss: 0.0115
Val Accuracy: 1.0000
Epoch 60/100
Train Loss: 0.0334
Train Accuracy: 0.9880
Val Loss: 0.0155
Val Accuracy: 1.0000
Epoch 70/100
Train Loss: 0.0303
Train Accuracy: 0.9880
Val Loss: 0.0105
Val Accuracy: 1.0000
Epoch 80/100
Train Loss: 0.0276
Train Accuracy: 0.9880
Val Loss: 0.0092
Val Accuracy: 1.0000
Epoch 90/100
Train Loss: 0.0254
Train Accuracy: 0.9880
Val Loss: 0.0080
Val Accuracy: 1.0000
Epoch 100/100
Train Loss: 0.0234
Train Accuracy: 1.0000
Val Loss: 0.0069
Val Accuracy: 1.0000


#### Test Model

In [9]:
def test_model(model, X_test, y_test):
    
    model.eval()

    output_test = model(X_test)
    _, predicted_test = torch.max(output_test, 1)
    correct_predictions_test = (predicted_test == y_test).sum().item()
    total_test_samples = y_test.size(0) * 1.0

    print(f"Test Accuracy: {correct_predictions_test/total_test_samples:.4f}")

test_model(model, X_test, y_test)

Test Accuracy: 0.9444


#### Save Model

In [10]:
### Save the model using pickle
with open(MODEL_PATH + 'iris_model.pkl', 'wb') as f:
    pickle.dump(model, f)

#### Load Model

In [11]:
with open(MODEL_PATH + 'iris_model.pkl', 'rb') as f:
    model = pickle.load(f)


sample = torch.tensor([[4.6, 3.6, 1.0, 0.2]])

output = model(sample)
_, prediction = torch.max(output, 1)
print(f"Prediction: {prediction.item()}")

Prediction: 0
