In [1]:
import pandas as pd
from IPython.core.pylabtools import figsize
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader,Dataset
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np

KeyboardInterrupt: 

In [None]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
device

In [None]:
torch.manual_seed(42)


In [None]:
df = pd.read_csv('./fashion-mnist_train.csv')

In [None]:
df.shape

In [None]:
fig,axes = plt.subplots(4,4,figsize=(10,10))
fig.suptitle("16 images")
for i,ax in enumerate(axes.flat):
    img = df.iloc[i,1:].values.reshape(28,28)
    ax.imshow(img)
    ax.axis('off')
    ax.set_title(f"label {df.iloc[i,0]}")

plt.tight_layout(rect=[0,0,1,1])
plt.show()

In [None]:
x = df.iloc[:,1:].values
y = df.iloc[:,0].values

In [None]:
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.2,random_state=42)

In [None]:
xtrain = xtrain/255.0
xtest = xtest/255.0
xtrain.shape

In [None]:
class customdataset(Dataset):
    def __init__(self,features,labels):
        self.features = torch.tensor(features,dtype=torch.float32)
        self.labels = torch.tensor(labels,dtype=torch.long)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx],self.labels[idx]


In [None]:
traindataset = customdataset(xtrain,ytrain)
len(traindataset)

In [None]:
traindataset[0]

In [None]:
testdataset = customdataset(xtest,ytest)
len(testdataset)

In [None]:
trainloader = DataLoader(traindataset,batch_size=32,shuffle=True,pin_memory=True)
testloader = DataLoader(testdataset,batch_size=32,shuffle=False,pin_memory=True)

In [None]:
# class MyNN(nn.Module):
#     def __init__(self,numfeatures):
#         super().__init__()
#         self.model = nn.Sequential(nn.Linear(numfeatures,128),nn.ReLU(),nn.Linear(128,64),nn.ReLU(),nn.Linear(64,10))
#
#     def forward(self,x):
#         return self.model(x)


In [None]:
# epochs = 100
# lr = 0.1

In [None]:
# model = MyNN(xtrain.shape[1]).to(device)
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(),lr=lr)

In [None]:
# for epoch in range(epochs):
#     for batchfeatures,batchlabels in trainloader:
#         batchfeatures = batchfeatures.to(device)
#         batchlabels = batchlabels.to(device)
#         output = model(batchfeatures).to(device)
#         loss = criterion(output,batchlabels)
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()
#     print(f"epoch {epoch+1}, loss: {loss.item():.4f}")


In [None]:
# model.eval()

In [None]:
# total =0
# correct = 0
# with torch.no_grad():
#     for batchfeatures,batchlabels in trainloader:
#         batchfeatures = batchfeatures.to(device)
#         batchlabels = batchlabels.to(device)
#         output = model(batchfeatures).to(device)
#         _, predicted = torch.max(output.data,1)
#         total += batchlabels.shape[0]
#         correct = correct + predicted.eq(batchlabels).sum().item()
# print(correct/total)

In [None]:
#test acc = 88
#train ac 97
# gap - 9 percent
# applying diff techniues to reduce overfitting

# dropout - > turning off neurons in between to maximize acc
applied after ReLu function

# Batch normalization - > during every mini batch,i.e, to  hidden layers to tabilize the change in weights from one layer to another
applied after every layer,before activation functions

In [None]:
class MyNN(nn.Module):
    def __init__(self,numfeatures):
        super().__init__()
        self.model = nn.Sequential(nn.Linear(numfeatures,128),
                                   nn.BatchNorm1d(128),
                                   nn.ReLU(),
                                   nn.Dropout(p=0.3),
                                   nn.Linear(128,64),
                                   nn.BatchNorm1d(64),
                                   nn.ReLU(),
                                   nn.Dropout(p=0.3),
                                   nn.Linear(64,10))

    def forward(self,x):
        return self.model(x)

# Regularization - > is applied to weights of model to apply penalty for large values by adding penalty, so that we get smaller or stable weights

In [None]:
model = MyNN(xtrain.shape[1]).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr=lr,weight_decay=1e-4)

In [None]:
#test acc - 89
#train acc - 94
# gap - 5 percent

In [None]:
class NewNN(nn.Module):
    def __init__(self,input_dim,output_dim,neurons_per_layer,num_hidden_layers,dropout):
        super().__init__()
        layers = []
        for i in range(num_hidden_layers):
            layers.append(nn.Linear(input_dim,neurons_per_layer))
            layers.append(nn.BatchNorm1d(neurons_per_layer))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            input_dim = neurons_per_layer
        layers.append(nn.Linear(neurons_per_layer,output_dim))
        self.model = nn.Sequential(*layers)

    def forward(self,x):
        return self.model(x)

In [None]:
def objective(trial):
    num_hidden_layers = trial.suggest_int("num_hidden_layers",1,5)
    neurons_per_layer = trial.suggest_int("neurons_per_layer",8,128,step=8)
    epochs = trial.suggest_int("epochs",10,50,step=10)
    learning_rate = trial.suggest_float("learning_rate",1e-5,1e-1,log=True)
    dropout = trial.suggest_float("dropout",0.1,0.5,step=0.1)
    batch_size = trial.suggest_categorical("batch_size",[16,32,64,128])
    optimizer = trial.suggest_categorical("optimizer",["SGD","Adam","RMSprop"])
    weight_decay = trial.suggest_float("weight_decay",1e-5,1e-3,log=True)

    trainloader = DataLoader(traindataset,batch_size=batch_size,shuffle=True,pin_memory=True)
    testloader = DataLoader(testdataset,batch_size=batch_size,shuffle=False,pin_memory=True)

    input_dim =784
    output_dim = 10
    model = NewNN(input_dim,output_dim,neurons_per_layer,num_hidden_layers,dropout)
    model.to(device)

    criterion = nn.CrossEntropyLoss()
    if optimizer == "SGD":
        optimizer = optim.SGD(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
    elif optimizer == "Adam":
        optimizer = optim.Adam(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
    else:
        optimizer = optim.RMSprop(model.parameters(),lr=learning_rate,weight_decay=weight_decay)

    for epoch in range(epochs):
        for batchfeatures,batchlabels in trainloader:
            batchfeatures = batchfeatures.to(device)
            batchlabels = batchlabels.to(device)
            output = model(batchfeatures).to(device)
            loss = criterion(output,batchlabels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    model.eval()

    total =0
    correct = 0
    with torch.no_grad():
        for batchfeatures,batchlabels in trainloader:
            batchfeatures = batchfeatures.to(device)
            batchlabels = batchlabels.to(device)
            output = model(batchfeatures).to(device)
            _, predicted = torch.max(output.data,1)
            total += batchlabels.shape[0]
            correct = correct + predicted.eq(batchlabels).sum().item()
        acc = correct/total
    return acc


In [None]:
import optuna
study = optuna.create_study(direction="maximize")
study.optimize(objective,n_trials=10)

In [None]:
study.best_params,study.best_value