# Explore pre-training data for visual model 

In [None]:
import os
import numpy as np
import pandas as pd
import torch
from torchvision import transforms
import torchvision
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
processed_data_path = os.path.join('../..', 'train_data', 'race_data_blue_1.pkl')

# df = pd.read_pickle(processed_data_path)
df_encoded = pd.read_pickle("../../train_data/blue_1/race_data_encoded.pkl")

In [None]:
# df_encoded.to_pickle("../../train_data/blue_1/race_data_encoded.pkl")

In [None]:
# ChatGPT below. 
def encode_single(record, columns):
    """
    Encodes a single record (row) into an integer.
    
    Args:
        record (dict): A dictionary containing the binary values for the columns.
        columns (list): The list of column names to encode.
    
    Returns:
        int: The encoded integer for the given record.
    """
    return sum(record[col] * (1 << i) for i, col in enumerate(reversed(columns)))

def decode_single(encoded_value, columns):
    """
    Decodes an integer value into a dictionary of binary columns.
    
    Args:
        encoded_value (int): The encoded integer value.
        columns (list): The list of column names to decode.
    
    Returns:
        dict: A dictionary with the decoded binary values.
    """
    num_columns = len(columns)
    return {col: (encoded_value >> i) & 1 for i, col in enumerate(reversed(columns))}


In [None]:
def encode_dataframe(df, columns):
    """
    Encodes multiple binary columns into a single integer column for the entire DataFrame.
    
    Args:
        df (pd.DataFrame): The input DataFrame with binary columns.
        columns (list): The list of column names to encode.
    
    Returns:
        pd.DataFrame: The DataFrame with an additional 'encoded' column.
    """
    df['encoded'] = df[columns].apply(lambda row: encode_single(row.to_dict(), columns), axis=1)
    return df

def decode_dataframe(df, encoded_column, columns):
    """
    Decodes an integer column into multiple binary columns for the entire DataFrame.
    
    Args:
        df (pd.DataFrame): The input DataFrame with the encoded column.
        encoded_column (str): The name of the column with encoded integers.
        columns (list): The list of column names to decode into.
    
    Returns:
        pd.DataFrame: The DataFrame with the decoded binary columns.
    """
    decoded_columns = df[encoded_column].apply(lambda val: decode_single(val, columns))
    decoded_df = pd.DataFrame(decoded_columns.tolist(), columns=columns, index=df.index)
    return pd.concat([df, decoded_df], axis=1)

In [None]:
# Define the column names
# columns = ['up', 'down', 'left', 'right']

# Encode
# df_encoded = encode_dataframe(df, columns)

In [None]:
# Print distribution statistics
print(df_encoded['encoded'].describe())

# Plot distribution
plt.figure(figsize=(10, 6))
df_encoded['encoded'].plot(kind='hist', bins=50, edgecolor='black')
plt.title('Distribution of Encoded Values')
plt.xlabel('Encoded Value')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

In [None]:
total_step = len(train_loader)

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                    .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    # Validation
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs

        print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total))

In [None]:
input_transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ])
data_folder = os.path.join('../..', 'train_data/blue_1/')

In [None]:
""" Custom dataset class. Derived from the excellent tutoral at https://pytorch.org/tutorials/beginner/data_loading_tutorial.html"""
import os
import glob
import torch
from torch.utils.data import Dataset, DataLoader


class BunchOfImagesDataset(Dataset):
    def __init__(self, folder, transforms=None, batch_size=64):
        self.folder = folder
        self.files = glob.glob(os.path.join(folder, '*.png'))
        self.transforms = transforms
        self.labels = pd.read_pickle(os.path.join(folder, 'race_data_encoded.pkl'))
        self.batch_size = batch_size

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        label = torch.tensor(int(self.labels.iloc[idx]['encoded'].item()))
        img_id = idx # replace by indexing into the df.
    
        # img_path = self.files[idx]
        # img_id = os.path.splitext(os.path.basename(img_path))[0]
        img_path = os.path.join(self.folder, f'blue_1_id_{str(img_id).zfill(4)}.png')

        image = Image.open(img_path).convert('RGB')
        
        if self.transforms:
            image = self.transforms(image)
        
        return image, label

In [None]:
# Initialize the dataset
dataset = BunchOfImagesDataset(data_folder, transforms=input_transform)
dataloader = DataLoader(dataset, batch_size=16,
                        shuffle=True, num_workers=0)

"""
# Query a bunch of data
for i in range(5):  # Query first 5 samples
    image, label = dataset[i]
    print(f"Image {i} - Label: {label}")
    plt.imshow(image.permute(1, 2, 0))  # Convert from (C, H, W) to (H, W, C)
    plt.show()
"""

In [None]:
len(dataset)


In [None]:
import torch
import torch.nn as nn
from PIL import Image
from torchvision import transforms


num_classes = 16 # (forward, backward, left, right and all their combinations)

# Load pre-trained model from timm
model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)

# Modify the model head for fine-tuning
num_features = model.classifier[-1].in_features
# model.classifier[-1] = nn.Linear(num_features, num_classes)

"""
model.classifier = nn.Sequential(
    nn.Dropout(0.5),               # Dropout layer with 50% probability
    nn.Linear(num_features, 256),  # Additional linear layer with 256 output features
    nn.ReLU(inplace=True),         # Activation function (you can choose other activation functions too)
    nn.Dropout(0.5),               # Dropout layer with 50% probability
    nn.Linear(256, num_classes)    # Final prediction fc layer
)
"""

In [None]:
num_classes = 10
num_epochs = 20
batch_size = 16
learning_rate = 0.005


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)  


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

total_step = len(dataloader)

for epoch in range(num_epochs):
    print(f"Epoch {epoch} / {num_epochs}")
    for i, (images, labels) in enumerate(dataloader): 
        print(f"Batch nr: {i}") 
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                    .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    """
    # Validation
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs

        print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total))
    """