In [34]:
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import pandas as pd
import torch.nn as nn
import torch.optim as optim


In [35]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device: ", device)


Using device:  cuda


In [36]:
df = pd.read_csv("fashion-mnist.csv")
print(df.head())
df.shape


   label  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  pixel8  \
0      0       0       0       0       0       0       0       0       9   
1      1       0       0       0       0       0       0       0       0   
2      2       0       0       0       0       0       0      14      53   
3      2       0       0       0       0       0       0       0       0   
4      3       0       0       0       0       0       0       0       0   

   pixel9  ...  pixel775  pixel776  pixel777  pixel778  pixel779  pixel780  \
0       8  ...       103        87        56         0         0         0   
1       0  ...        34         0         0         0         0         0   
2      99  ...         0         0         0         0        63        53   
3       0  ...       137       126       140         0       133       224   
4       0  ...         0         0         0         0         0         0   

   pixel781  pixel782  pixel783  pixel784  
0         0         0         

(10000, 785)

In [37]:
X_train, X_test, y_train, y_test = train_test_split(
    df.iloc[:, 1:], df.iloc[:, 0], test_size=0.2, random_state=42
)
X_test.shape


(2000, 784)

In [38]:
X_train = X_train / 255.0
X_test = X_test / 255.0


In [39]:
class MyCustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features.values, dtype=torch.float32)
        self.labels = torch.tensor(labels.values, dtype=torch.long)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, index):
        return self.features[index], self.labels[index]


In [40]:
train_dataset = MyCustomDataset(X_train, y_train)
test_dataset = MyCustomDataset(X_test, y_test)
len(train_dataset)
# len(test_dataset)


8000

In [41]:
class MyNN(nn.Module):
    def __init__(self, input_dim, output_dim, num_hidden_layers, neurons_per_layer, dr):
        super().__init__()

        layers = []

        for i in range(num_hidden_layers):
            layers.append(nn.Linear(input_dim, neurons_per_layer))
            layers.append(nn.BatchNorm1d(neurons_per_layer))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(p=dr))
            input_dim = neurons_per_layer

        layers.append(nn.Linear(neurons_per_layer, output_dim))

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)


In [48]:
def objective(trial):
    num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 5)
    neurons_per_layer = trial.suggest_int("neurons_per_layer", 8, 128, step=8)
    epochs = trial.suggest_int("epochs", 10, 50, step=10)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128])
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "SGD", "RMSprop"])
    weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)

    input_dim = 784
    output_dim = 10

    model = MyNN(
        input_dim, output_dim, num_hidden_layers, neurons_per_layer, dr=dropout_rate
    )
    model = model.to(device)

    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True
    )
    test_loader = DataLoader(
        test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True
    )

    criterion = nn.CrossEntropyLoss()

    if optimizer_name == "Adam":
        optimizer = optim.Adam(
            model.parameters(), lr=learning_rate, weight_decay=weight_decay
        )
    elif optimizer_name == "SGD":
        optimizer = optim.SGD(
            model.parameters(), lr=learning_rate, weight_decay=weight_decay
        )
    else:
        optimizer = optim.RMSprop(
            model.parameters(), lr=learning_rate, weight_decay=weight_decay
        )

    for epoch in range(epochs):
        for batch_features, batch_labels in train_loader:
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(
                device
            )
            outputs = model(batch_features)
            loss = criterion(outputs, batch_labels)

            optimizer.zero_grad()

            loss.backward()

            optimizer.step()

    model.eval()

    total = 0
    correct = 0

    with torch.no_grad():
        for batch_features, batch_labels in test_loader:
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(
                device
            )
            outputs = model(batch_features)
            _, predicted = torch.max(outputs, 1)
            total += batch_labels.shape[0]
            correct += (predicted == batch_labels).sum().item()

        accuracy = correct / total

    return accuracy


In [44]:
!pip install optuna




In [45]:
import optuna


In [46]:
study = optuna.create_study(direction="maximize")


[I 2025-01-23 08:20:13,924] A new study created in memory with name: no-name-5eaab247-5d2b-43e7-bdba-d2067a10529c


In [50]:
study.optimize(objective, n_trials=10)


[I 2025-01-23 08:28:24,507] Trial 11 finished with value: 0.7895 and parameters: {'num_hidden_layers': 2, 'neurons_per_layer': 128, 'epochs': 50, 'learning_rate': 0.005811675114447413, 'dropout_rate': 0.1, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 2.636676440542228e-05}. Best is trial 6 with value: 0.859.
[I 2025-01-23 08:28:36,649] Trial 12 finished with value: 0.8425 and parameters: {'num_hidden_layers': 2, 'neurons_per_layer': 128, 'epochs': 10, 'learning_rate': 0.00014477499372671362, 'dropout_rate': 0.5, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 7.618639803736071e-05}. Best is trial 6 with value: 0.859.
[I 2025-01-23 08:28:48,491] Trial 13 finished with value: 0.838 and parameters: {'num_hidden_layers': 2, 'neurons_per_layer': 112, 'epochs': 30, 'learning_rate': 0.0024571196479046937, 'dropout_rate': 0.4, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 0.00011264365899983148}. Best is trial 6 with value: 0.859.
[I 2025-01-23 08:29:25,255] 

In [54]:
study.best_trial


FrozenTrial(number=6, state=1, values=[0.859], datetime_start=datetime.datetime(2025, 1, 23, 8, 26, 19, 126479), datetime_complete=datetime.datetime(2025, 1, 23, 8, 26, 37, 654361), params={'num_hidden_layers': 3, 'neurons_per_layer': 128, 'epochs': 40, 'learning_rate': 0.0002899126597413497, 'dropout_rate': 0.4, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 2.0673201499782324e-05}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'num_hidden_layers': IntDistribution(high=5, log=False, low=1, step=1), 'neurons_per_layer': IntDistribution(high=128, log=False, low=8, step=8), 'epochs': IntDistribution(high=50, log=False, low=10, step=10), 'learning_rate': FloatDistribution(high=0.1, log=True, low=1e-05, step=None), 'dropout_rate': FloatDistribution(high=0.5, log=False, low=0.1, step=0.1), 'batch_size': CategoricalDistribution(choices=(16, 32, 64, 128)), 'optimizer': CategoricalDistribution(choices=('Adam', 'SGD', 'RMSprop')), 'weight_decay': FloatDistrib

In [52]:
study.best_value


0.859

In [53]:
study.best_params


{'num_hidden_layers': 3,
 'neurons_per_layer': 128,
 'epochs': 40,
 'learning_rate': 0.0002899126597413497,
 'dropout_rate': 0.4,
 'batch_size': 64,
 'optimizer': 'RMSprop',
 'weight_decay': 2.0673201499782324e-05}