- Install Dependencies

In [2]:
%pip install git+https://github.com/Louis-Li-dev/ML_tool_kit
# %pip install torch
# %pip install numpy
# %pip install matplotlib
# %pip install tqdm

Collecting git+https://github.com/Louis-Li-dev/ML_tool_kit
  Cloning https://github.com/Louis-Li-dev/ML_tool_kit to c:\users\ss348\appdata\local\temp\pip-req-build-9dp30wu2
  Resolved https://github.com/Louis-Li-dev/ML_tool_kit to commit fd502a1817451bb91f87680e25a0e05785a2be5b
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Note: you may need to restart the kernel to use updated packages.


  Running command git clone --filter=blob:none -q https://github.com/Louis-Li-dev/ML_tool_kit 'C:\Users\ss348\AppData\Local\Temp\pip-req-build-9dp30wu2'


- Import Packages

In [3]:
from torchvision import transforms  # If you're dealing with image data
import torch.nn as nn
from mkit.torch_support.nn_utils import training_loop
from mkit.torch_support.tensor_utils import k_fold_validation
from mkit.torch_support.model.Autoencoder import GANEncoder
from torchvision import transforms
from torchvision.datasets import MNIST
from tqdm import tqdm
import torch

- Define Models

In [4]:

class Predictor(nn.Module):
    def __init__(self, width, height, hidden_dims, output_dim):
        """
        Predictor class with GANEncoder and fully connected layers for prediction.

        Args:
            hidden_dims (list[int]): List of dimensions for convolutional layers in GANEncoder.
            output_dim (int): Dimension of the prediction output.
        """
        super(Predictor, self).__init__()
        
        # Encoder
        self.encoder = GANEncoder(hidden_dims=hidden_dims)
        
        # Fully connected layers for prediction
        self.fc_layers = nn.Sequential(
            nn.Linear(hidden_dims[-1] * width * height, 128),  # From the last encoder dimension to 128
            nn.ReLU(),
            nn.Linear(128, 64),              # From 128 to 64
            nn.ReLU(),
            nn.Linear(64, output_dim)        # From 64 to output dimension
        )

    def forward(self, x):
        """
        Forward pass for the predictor.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Prediction output.
        """
        x = self.encoder(x)  # Pass through the encoder
        x = x.view(x.size(0), -1)  # Flatten the output for the fully connected layers
        x = self.fc_layers(x)  # Pass through the fully connected layers
        return x

- Use MNIST Data

In [5]:


transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

from torch.utils.data import DataLoader, random_split
dataset = MNIST(root='./data', train=False, download=True, transform=transform)


- Training and Using KFolds
    - to keep it simple, I only train the model with 2 folds and 1 epoch. In practice, you should not do the same.

In [None]:


def training_procedure(train_loader):
    NUM_OF_CLASSES = 10
    WIDTH, HEIGHT = 28, 28
    model = Predictor(width=WIDTH, height=HEIGHT, hidden_dims=[1, 2], output_dim=NUM_OF_CLASSES)
    device = torch.device('cuda')
    criterion = torch.nn.CrossEntropyLoss()
    model, losses = training_loop(
        model=model, 
        device=device,
        train_loader=train_loader,
        optimizer=torch.optim.Adam(model.parameters()),
        criterion=criterion,
        keep_losses=True,
        epochs=1
    )
    return model, device, criterion

def testing_procedure(
        model,
        test_loader,
        device,
        criterion    
    ):
    
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in tqdm(test_loader):
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to the appropriate device

            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            total_loss += loss.item()

            # Compute accuracy
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)

    average_loss = total_loss / len(test_loader)
    accuracy = correct_predictions / total_samples

    print(f'Average Loss: {average_loss:.4f}, Accuracy: {accuracy:.4%}')
    return accuracy
# Define the procedure function
def procedure(train_subset, test_subset, **kwargs):
    train_loader = DataLoader(train_subset)
    test_loader = DataLoader(test_subset)
    model, device, criterion = training_procedure(
        train_loader
    )
    accurary = testing_procedure(
        model,
        test_loader, 
        device,
        criterion
    )
    return {"accurary": accurary}

result_list = k_fold_validation(dataset, n_splits=2, procedure=procedure)


- Evaluation

In [None]:
import pandas as pd
def get_df(result_list):
    df = pd.DataFrame(data=result_list)
    df = df.reset_index(names=['fold'])
    df['fold'] += 1
    return df

In [71]:
get_df(result_list)

Unnamed: 0,fold,accurary
0,1,0.8628
1,2,0.8762


- Machine Learning

In [66]:


from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier


def procedure(train_subset, test_subset, **kwargs):
    train_x, train_y, _ = train_subset
    test_x, test_y, _ = test_subset
    train_x = train_x.reshape(len(train_x), -1)
    test_x = test_x.reshape(len(test_x), -1)
    model = KNeighborsClassifier()
    model.fit(train_x, train_y)
    accurary = model.score(test_x, test_y)
    return {'accurary', accurary}


n_splits = 5
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)

numpy_x = dataset.data
numpy_y = dataset.targets
dataset_tuple = tuple([numpy_x, numpy_y, numpy_x])


In [72]:
get_df(result_list)

Unnamed: 0,fold,accurary
0,1,0.8628
1,2,0.8762
