In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder

import timm
import wandb
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys
from tqdm.notebook import tqdm

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")

## Dataset and DataLoader

In [3]:
import torch
from torch.utils.data import Dataset, random_split

class DigitRecognizerDataset2(Dataset):
    def __init__(self, data, label, validation_split=0.2, transform=None):
        self.data = data
        self.label = label
        self.transform = transform

        # Split dataset into training and validation
        num_samples = len(self.data)
        num_validation = int(validation_split * num_samples)
        num_training = num_samples - num_validation

        self.train_data, self.val_data = random_split(self.data, [num_training, num_validation])

    def __len__(self):
        return len(self.train_data)

    def __getitem__(self, index):
        # Get features (X) and target (y) for the given index
        X = self.train_data.iloc[index, :0]  # Exclude the target column
        y = self.train_data.loc[index, self.label]

        # Convert to PyTorch tensors
        X_tensor = torch.tensor(X.values, dtype=torch.float32)
        y_tensor = torch.tensor(y, dtype=torch.float32)

        if self.transform:
            X_tensor = self.transform(X_tensor)

        return X_tensor, y_tensor

    def get_validation_data(self):
        # Return validation data for evaluation
        return self.val_data


In [12]:
class DigitRecognizerDataset(Dataset):
    def __init__(self, data, target):
        self.data = data
        self.target = target
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.target[idx]

In [13]:
df = pd.read_csv('../input/digit-recognizer/train.csv')
test_df = pd.read_csv('../input/digit-recognizer/test.csv')

target_column = 'label'

In [14]:
X = df.drop(target_column, axis=1)
y = df[target_column]

In [15]:
from sklearn.model_selection import train_test_split

train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=42)

train_X = torch.tensor(train_X.values, dtype=torch.float)
val_X = torch.tensor(val_X.values, dtype=torch.float)
train_y = torch.tensor(train_y.values, dtype=torch.float)
val_y = torch.tensor(val_y.values, dtype=torch.float)

train_dataset = DigitRecognizerDataset(train_X, train_y)
valid_dataset = DigitRecognizerDataset(val_X, val_y)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(valid_dataset, batch_size=64, shuffle=True)

In [16]:
for ex, label in train_dataset:
    break

In [17]:
ex.shape, label.shape

(torch.Size([784]), torch.Size([]))

In [19]:
label

tensor(7.)

## Model

In [20]:
class DigitRecognizer(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, classes):
        super(DigitRecognizer, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, classes)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [22]:
model = DigitRecognizer(len(df.columns), 512, 512, 1)
print(str(model)[:500])

DigitRecognizer(
  (fc1): Linear(in_features=785, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=1, bias=True)
)


In [None]:
example = model(ex)
example.shape