## Denisha's Experiments and Research
This notebook is experimentation and implemenation of a simple neural network applied to Fashion MNIST. The purpose was to learn ML basics and start working with Fashion MNIST in earnist. This notebook is an continuation and refinement of the model in the Denisha's first notebook.

In [None]:
import torch
from torchvision import datasets, transforms
import numpy as np
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist

# Set random seed for reproducibility
torch.manual_seed(42)

# Load the FMNIST dataset
train_dataset = datasets.FashionMNIST(root='C:\\Users\\denis\\CACI', train=True, download=True,
                                      transform=transforms.ToTensor())

# Convert the training set to numpy arrays
train_data = train_dataset.data.numpy()
train_labels = train_dataset.targets.numpy()

# Reshape the data to 2D (number of samples x number of features)
train_data_reshaped = train_data.reshape(train_data.shape[0], -1)

# Perform KMeans clustering
n_clusters = 30000
kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(train_data_reshaped)

# Find the nearest neighbor for each cluster center
cluster_centers = kmeans.cluster_centers_
subset_indices = []
for center_idx in range(n_clusters):
    center = cluster_centers[center_idx]
    center_distances = np.linalg.norm(train_data_reshaped - center, axis=1)
    nearest_indices = np.argpartition(center_distances, 3000)[:3000]
    subset_indices.extend(nearest_indices)

# Define the data loader for the subset dataset
batch_size = 64
subset_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                                            sampler=torch.utils.data.SubsetRandomSampler(subset_indices))

# Define the baseline CNN classifier architecture
class CNN(torch.nn.Module):
    # ... model definition ...

# Instantiate the CNN model, criterion, optimizer, and device
    num_classes = len(train_dataset.classes)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    classifier = CNN().to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(classifier.parameters(), lr=learning_rate, momentum=0.9)

# Train the baseline classifier on the subset dataset
num_epochs = 20

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(subset_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = classifier(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print('Epoch [%d/%d], Loss: %.4f' % (epoch + 1, num_epochs, running_loss / len(subset_loader)))
