In [27]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from sklearn.model_selection import train_test_split

In [36]:
df = pd.read_csv('/content/fashion-mnist_train.csv')

In [19]:
print(torch.cuda.is_available())

True


In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [9]:
torch.manual_seed(42)

<torch._C.Generator at 0x7ca3e0089bd0>

In [10]:
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,9,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,6,0,0,0,0,0,0,0,5,0,...,0.0,0.0,0.0,30.0,43.0,0.0,0.0,0.0,0.0,0.0
3,0,0,0,0,1,2,0,0,0,0,...,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,3,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [38]:
df.shape

(60000, 785)

In [39]:
x = df.iloc[:,1:]
y = df.iloc[:,0]

In [40]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [41]:
x_train = x_train/255.0
x_test = x_test/255.0

In [43]:
x_train.shape[1]

784

In [45]:
#create coustomdataset class
class Custumdataset(Dataset):
  def __init__(self,features,labels):
    self.features = torch.tensor(features.values , dtype = torch.float32).reshape(-1,1,28,28)
    self.labels = torch.tensor(labels.values, dtype =torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self,idx):
    return self.features[idx],self.labels[idx]

In [46]:
test_dataset = Custumdataset(x_test,y_test)

In [47]:
train_dataset = Custumdataset(x_train,y_train)

In [48]:
#nn class
class NeuralNetwork(nn.Module):
  def __init__(self,input_dim):
    super().__init__()
    self.features = nn.Sequential(
        nn.Conv2d(input_dim, 32,kernel_size=3,padding="same"),
        nn.ReLU(),
        nn.BatchNorm2d(32),
        nn.MaxPool2d(kernel_size = 2, stride = 2),

        nn.Conv2d(32,64,kernel_size=3,padding="same"),
        nn.ReLU(),
        nn.BatchNorm2d(64),
        nn.MaxPool2d(kernel_size = 2, stride = 2)
    )
    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(3136,128),
        nn.ReLU(),
        nn.Dropout(0.3),

        nn.Linear(128,64),
        nn.ReLU(),
        nn.Dropout(0.3),

        nn.Linear(64,10)
        )


  def forward(self,x):
    x = self.features(x)
    x = self.classifier(x)
    return x

In [49]:
learning_rate = 0.01
epochs = 100
batches = 32

In [50]:
test_loader = DataLoader(test_dataset,batch_size=batches,shuffle=False,pin_memory = True)
train_loader = DataLoader(train_dataset,batch_size=batches,shuffle=True,pin_memory = True)

In [51]:
#creating a model
model = NeuralNetwork(1)
model = model.to(device)

#loss function
loss = nn.CrossEntropyLoss()

#optimizer
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate )

In [52]:
#training loop
for epoch in range(epochs) :
  epoch_loss = 0.0
  for batch_fertures,batch_labels in train_loader:

    #moving to gpu
    batch_fertures = batch_fertures.to(device)
    batch_labels = batch_labels.to(device)

    #forward pass
    y_pred = model(batch_fertures)

    #loss calculation
    loss_value = loss(y_pred,batch_labels)

    #making gradindients zero
    optimizer.zero_grad()

    #backward pass
    loss_value.backward()

    #updating the weights and bias
    optimizer.step()
    epoch_loss += loss_value.item()
  avg = epoch_loss/len(train_loader)
  print(f"epochs = {epoch+1} and loss = {avg}")

epochs = 1 and loss = 0.5835688702066739
epochs = 2 and loss = 0.34612032180527846
epochs = 3 and loss = 0.29759026967237395
epochs = 4 and loss = 0.2597041491319736
epochs = 5 and loss = 0.23712102209279934
epochs = 6 and loss = 0.21549501223737993
epochs = 7 and loss = 0.20094882647693157
epochs = 8 and loss = 0.18546761492018898
epochs = 9 and loss = 0.17030290497032305
epochs = 10 and loss = 0.1579861388473461
epochs = 11 and loss = 0.14675212792617578
epochs = 12 and loss = 0.1363543469250823
epochs = 13 and loss = 0.1304138589266998
epochs = 14 and loss = 0.11806185886822641
epochs = 15 and loss = 0.11061738199243942
epochs = 16 and loss = 0.1043562748352997
epochs = 17 and loss = 0.09610603086006207
epochs = 18 and loss = 0.09114721358070771
epochs = 19 and loss = 0.08602721942670177
epochs = 20 and loss = 0.07933689145821457
epochs = 21 and loss = 0.07617865304703203
epochs = 22 and loss = 0.07047208880190738
epochs = 23 and loss = 0.06474517304968322
epochs = 24 and loss = 0.0

In [53]:
#evaluting on train data

total = 0
correct = 0

for batch_features,batch_labels in train_loader:
  #moving the parameter on gpu
  batch_features = batch_features.to(device)
  batch_labels = batch_labels.to(device)

  #forward pass
  y_pred = model(batch_features)

  #finding values
  _,predicted = torch.max(y_pred,1)
  total += batch_labels.size(0)
  correct += (predicted == batch_labels).sum().item()

accuracy = correct/total
print(f"accuracy = {accuracy}")

accuracy = 0.99825


In [54]:
#evaluating on test data
total = 0
correct = 0

for batch_features, batch_labels in test_loader:
  batch_features = batch_features.to(device)
  batch_labels = batch_labels.to(device)

  y_pred=model(batch_features)

  _,predicted = torch.max(y_pred,1)
  total +=batch_labels.size(0)
  correct +=(predicted == batch_labels).sum().item()

accuracy = correct/total
print(f"accuracy = {accuracy}")

accuracy = 0.9156666666666666


## Check for NaNs in data

### Subtask:
Inspect `x_train` and `x_test` for any NaN values that might be causing the loss to become NaN.


**Reasoning**:
To identify if NaN values are present in `x_train` and `x_test`, I will use the `.isna().sum()` method on each DataFrame. This will provide a count of NaN values per column, helping to diagnose potential issues causing loss to become NaN.



In [None]:
print("NaN values in x_train:\n", x_train.isna().sum().sum())
print("NaN values in x_test:\n", x_test.isna().sum().sum())

NaN values in x_train:
 658
NaN values in x_test:
 0


**Reasoning**:
Since `x_train` contains NaN values, I will fill them with 0 to ensure the data is clean and suitable for model training. This will help prevent issues like the NaN loss observed during training.



In [None]:
x_train = x_train.fillna(0)
print("NaN values in x_train after filling:", x_train.isna().sum().sum())

NaN values in x_train after filling: 0


## Re-evaluate model training

### Subtask:
Retrain the model after handling NaN values and observe if the loss stabilizes.


**Reasoning**:
Since `x_train` has been cleaned, I need to re-instantiate the `Custumdataset` for both training and testing datasets using the updated `x_train` and `x_test` to ensure the datasets reflect the changes.



In [None]:
train_dataset = Custumdataset(x_train,y_train)
test_dataset = Custumdataset(x_test,y_test)
print("Custumdataset re-instantiated for training and testing.")

Custumdataset re-instantiated for training and testing.


**Reasoning**:
After re-instantiating the datasets, I will re-create the DataLoaders, re-initialize the model, loss function, and optimizer, and then rerun the training and evaluation loops to observe the impact of NaN handling on the model's performance and loss stability.



In [None]:
train_loader = DataLoader(train_dataset,batch_size =32,shuffle=True,pin_memory = True)
test_loader = DataLoader(test_dataset,batch_size=32,shuffle=False,pin_memory = True)
print("DataLoaders re-created.")

#model intialization
model = NeuralNetwork(x_train.shape[1])
model = model.to(device)
print("Model re-initialized and moved to device.")

#loss function
loss = nn.CrossEntropyLoss()
print("Loss function re-initialized.")

#optimizer
optimizer = optim.Adam(model.parameters(),lr=learning_rate)
print("Optimizer re-initialized.")

#training loop
print("Starting retraining...")
for epoch in range(epochs):
  batch_loss = 0.0
  for batch_features, batch_labels in train_loader:

    #moving features and lables on gpu before forwardpass
    batch_features = batch_features.to(device)
    batch_labels = batch_labels.to(device)

    #forward pass
    y_pred= model(batch_features)

    #loss calculation
    loss_value = loss(y_pred,batch_labels)
    # Removed NaN check as NaNs should be handled now

    #making the grad values zero n=before backward pass
    optimizer.zero_grad()

    #backward
    loss_value.backward()

    #updating the gradient values on weight and bais
    optimizer.step()
    batch_loss += loss_value.item()

  avg_loss = batch_loss/len(train_loader)
  if (epoch + 1) % 10 == 0 or epoch == 0:
    print(f"epoch = {epoch+1} and loss = {avg_loss:.4f}")
print("Retraining complete.")

#setting model into evaluation mode
model.eval()
print("Model set to evaluation mode.")

#evaluating performance
total = 0
correct = 0
with torch.no_grad(): # Disable gradient calculation for evaluation
    for batch_features, batch_labels in test_loader:

      #moving features and lables on gpu before forwardpass
      batch_features = batch_features.to(device)
      batch_labels = batch_labels.to(device)

      output = model(batch_features)

      _,predicted = torch.max(output,1)
      total += batch_labels.size(0)
      correct += (predicted == batch_labels).sum().item()

accuracy = correct/total
print(f"accuracy = {accuracy:.4f}")


DataLoaders re-created.
Model re-initialized and moved to device.
Loss function re-initialized.
Optimizer re-initialized.
Starting retraining...


RuntimeError: Given groups=1, weight of size [32, 784, 3, 3], expected input[32, 1, 28, 28] to have 784 channels, but got 1 channels instead

In [None]:
x_train.shape[:]

(5682, 784)