In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler

In [3]:
torch.manual_seed(23)

<torch._C.Generator at 0x7ebf9dcd9930>

# Task
Explain the error in the provided Python code for loading a Kaggle dataset, fix the error, and load the "fashion-mnist" dataset from Kaggle using the Kaggle API.

## Install kaggle api

### Subtask:
Install the `kaggle` library in the Colab environment.


**Reasoning**:
The subtask is to install the `kaggle` library. I will use `pip install` in a code block to achieve this.



In [6]:
!pip install kaggle



## Upload kaggle api credentials

### Subtask:
Instruct the user on how to get their `kaggle.json` file and upload it to Colab.


**Reasoning**:
I need to provide instructions to the user on how to obtain and upload their Kaggle API credentials (`kaggle.json`) to Colab, as per the subtask instructions. This requires explaining the process of generating the token on Kaggle and then uploading it in the Colab environment.



## Download the dataset

### Subtask:
Use the Kaggle API to download the Fashion MNIST dataset.


**Reasoning**:
Create the .kaggle directory, move the kaggle.json file into it, set the correct permissions, and then download the dataset using the Kaggle API.



In [9]:
import os
import shutil

# Create the .kaggle directory
if not os.path.exists('/root/.kaggle'):
    os.makedirs('/root/.kaggle')

# Move the kaggle.json file
# Assumes kaggle.json is in the current directory after upload
if os.path.exists('kaggle.json'):
    shutil.move('kaggle.json', '/root/.kaggle/kaggle.json')
else:
    print("kaggle.json not found. Please upload it first.")

# Set permissions for kaggle.json
os.chmod('/root/.kaggle/kaggle.json', 0o600)

# Download the dataset using Kaggle API
!kaggle datasets download -d zalando-research/fashionmnist

Dataset URL: https://www.kaggle.com/datasets/zalando-research/fashionmnist
License(s): other
Downloading fashionmnist.zip to /content
  0% 0.00/68.8M [00:00<?, ?B/s]
100% 68.8M/68.8M [00:00<00:00, 1.37GB/s]


**Reasoning**:
The dataset has been downloaded as a zip file. Now, unzip the downloaded file to access the CSV files.



In [10]:
import zipfile

# Unzip the downloaded file
with zipfile.ZipFile('fashionmnist.zip', 'r') as zip_ref:
    zip_ref.extractall('.')

# List the extracted files to confirm
!ls

fashion-mnist_test.csv	 sample_data		 train-images-idx3-ubyte
fashion-mnist_train.csv  t10k-images-idx3-ubyte  train-labels-idx1-ubyte
fashionmnist.zip	 t10k-labels-idx1-ubyte


## Load the dataset

### Subtask:
Read the downloaded `fashion-mnist_train.csv` file into a pandas DataFrame.


**Reasoning**:
Read the `fashion-mnist_train.csv` file into a pandas DataFrame and display the first few rows.



In [11]:
df = pd.read_csv('fashion-mnist_train.csv')
display(df.head())

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
df.shape

(60000, 785)

In [13]:
#train test split
x=df.iloc[:, 1:].values
y=df.iloc[:, 0].values
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [14]:
x_train=x_train/255.0
x_test=x_test/255.0

In [15]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
      return len(self.features)

    def __getitem__(self, idx):
      return self.features[idx], self.labels[idx]


In [16]:
train_dataset=CustomDataset(x_train, y_train)
test_dataset=CustomDataset(x_test, y_test)

In [17]:
train_loader=DataLoader(train_dataset, batch_size=32, shuffle=True) #pin_memory=true
test_loader=DataLoader(test_dataset, batch_size=32, shuffle=False)  #pin_memory=true

In [18]:
len(train_loader)

1500

In [30]:
class Myfashion(nn.Module):

  def __init__(self, num_features):
    super().__init__()
    self.model=nn.Sequential(
        nn.Linear(num_features, 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 10)
    )


  def forward(self, x):
    return self.model(x)

In [32]:
Learning_rate=0.001
epochs =100

In [33]:
#initiate the model
model=Myfashion(x_train.shape[1])
#loss fuction
criterion=nn.CrossEntropyLoss()
#optimizer
optimizer=optim.SGD(model.parameters(), lr=Learning_rate)

In [36]:
#training loop
for epoch in range(epochs):
  total_epoch=0

  for batch_feature, batch_labels in train_loader:
    optimizer.zero_grad()
    output=model(batch_feature)
    loss=criterion(output, batch_labels)
    loss.backward()
    optimizer.step()

    total_epoch_loss= total_epoch+loss.item()
    avg_loss=total_epoch_loss/len(train_loader)

  print(f'Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}')

Epoch 1/100, Loss: 0.0006
Epoch 2/100, Loss: 0.0006
Epoch 3/100, Loss: 0.0004
Epoch 4/100, Loss: 0.0004
Epoch 5/100, Loss: 0.0004
Epoch 6/100, Loss: 0.0005
Epoch 7/100, Loss: 0.0002
Epoch 8/100, Loss: 0.0002
Epoch 9/100, Loss: 0.0002
Epoch 10/100, Loss: 0.0003
Epoch 11/100, Loss: 0.0003
Epoch 12/100, Loss: 0.0004
Epoch 13/100, Loss: 0.0003
Epoch 14/100, Loss: 0.0005
Epoch 15/100, Loss: 0.0004
Epoch 16/100, Loss: 0.0004
Epoch 17/100, Loss: 0.0004
Epoch 18/100, Loss: 0.0003
Epoch 19/100, Loss: 0.0001
Epoch 20/100, Loss: 0.0002
Epoch 21/100, Loss: 0.0003
Epoch 22/100, Loss: 0.0003
Epoch 23/100, Loss: 0.0002
Epoch 24/100, Loss: 0.0003
Epoch 25/100, Loss: 0.0005
Epoch 26/100, Loss: 0.0003
Epoch 27/100, Loss: 0.0004
Epoch 28/100, Loss: 0.0001
Epoch 29/100, Loss: 0.0004
Epoch 30/100, Loss: 0.0002
Epoch 31/100, Loss: 0.0002
Epoch 32/100, Loss: 0.0002
Epoch 33/100, Loss: 0.0003
Epoch 34/100, Loss: 0.0004
Epoch 35/100, Loss: 0.0006
Epoch 36/100, Loss: 0.0001
Epoch 37/100, Loss: 0.0003
Epoch 38/1

In [37]:
model.eval()

Myfashion(
  (model): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [39]:
#evaluation code
total=0
correct=0

with torch.no_grad():
  for batch_feature, batch_labels in test_loader:
    output=model(batch_feature)

    _, predicted=torch.max(output.data, 1)
    total+=batch_labels.size(0)
    correct+=(predicted==batch_labels).sum().item()

accuracy=correct/total
print(accuracy)

0.8611666666666666
