# Setup

On google colab, you have to restart runtime after running the following line

In [26]:
!pip install omegaconf



In [27]:
from google.colab import drive
drive.mount("/content/drive/")
#"/content/drive/My Drive/NN-kNN/"
folder_name = "/content/drive/Othercomputers/My MacBook Pro/GitHub/NN-kNN/"
import sys
sys.path.insert(0,folder_name)

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [28]:
##This is added because my Rdata uses Cdata for the covid data set.
##Rdata use Cdata function to load the data set, then convert it to regression problem
import os
import sys
sys.path.append('/content/drive/Othercomputers/My MacBook Pro/GitHub/NN-kNN/dataset')


In [29]:
# folder_name = os.getcwd()

In [30]:
import torch
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from tqdm import tqdm
from omegaconf import DictConfig, OmegaConf

from dataset import cls_small_data as Cdata
import model.cls_model as Cmodel
from dataset import cls_medium_data

from dataset import reg_data as Rdata
import model.reg_model as Rmodel

In [31]:
conf_file = OmegaConf.load(os.path.join(folder_name, 'config.yaml'))

In [32]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# NCA and LMNN setup

In [33]:
pip install metric-learn



In [34]:
import metric_learn
from metric_learn import LMNN,NCA

# Data Sets

Supported small dataset for classification:  
'zebra',
'zebra_special',
'bal',
'digits',
'iris',
'wine',
'breast_cancer',

for regression:
'califonia_housing',
'abalone',
'diabets',
'body_fat',
'ziweifaces'


Newly added data sets for mental health (psychology):

Classification:
'psych_depression_physical_symptons',
'covid_anxious',
'covid_depressed'


In [35]:
dataset_name = 'covid_depressed'
cfg = conf_file['dataset'][dataset_name]
#TODO need to add other covid data sets here.
if dataset_name in ['covid_depressed','covid_anxious','psych_depression_physical_symptons',
                    'zebra','zebra_special','bal','digits','iris','wine','breast_cancer']:
    criterion = torch.nn.CrossEntropyLoss()
    Xs, ys = Cdata.Cls_small_data(dataset_name)
elif dataset_name in []:
    criterion = torch.nn.CrossEntropyLoss()
    Xs, ys = cls_medium_data.Cls_medium_data(dataset_name)
else:
    criterion = torch.nn.MSELoss()
    Xs, ys = Rdata.Reg_data(dataset_name)

Columns in the dataset: Index(['SU_ID', 'P_PANEL', 'NATIONAL_WEIGHT', 'REGION_WEIGHT',
       'NATIONAL_WEIGHT_POP', 'REGION_WEIGHT_POP', 'NAT_WGT_COMB_POP',
       'REG_WGT_COMB_POP', 'P_GEO', 'SOC1',
       ...
       'REGION9', 'P_DENSE', 'MODE', 'LANGUAGE', 'MAIL50', 'RACE1_BANNER',
       'RACE2_BANNER', 'INC_BANNER', 'AGE_BANNER', 'HH_BANNER'],
      dtype='object', length=177)


In [36]:
# This section is used to reload the imported module.
# For example, if you made any changes in the model.cls_model, you should run importlib.reload(Cmodel) as long as you set import model.cls_model as Cmodel.
import importlib
importlib.reload(Cdata)

<module 'dataset.cls_small_data' from '/content/drive/Othercomputers/My MacBook Pro/GitHub/NN-kNN/dataset/cls_small_data.py'>

# Classification with NNKNN

In [37]:
# prompt: get the unique y values and their counts

unique_values, counts = np.unique(ys, return_counts=True)
print(f"Unique values: {unique_values}")
print(f"Counts: {counts}")
print(f"Xs.size(): {Xs.size()}")


Unique values: [0 1 2 3]
Counts: [1703 1703 1703 1703]
Xs.size(): torch.Size([6812, 161])


In [38]:
def train_cls(X_train,y_train, X_test, y_test, cfg:DictConfig):
  X_train = X_train.to(device)
  y_train = y_train.to(device)
  X_test = X_test.to(device)

  train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_train, y_train), batch_size=cfg.batch_size, shuffle=True)

  # Train model
  model = Cmodel.NN_k_NN(X_train,
                         y_train,
                         cfg.ca_weight_sharing,
                         cfg.top_case_enabled,
                         cfg.top_k,
                         cfg.discount,
                         device=device)

  optimizer = torch.optim.Adam(model.parameters(), lr=cfg.learning_rate) #, weight_decay=1e-5)

  patience_counter = 0
  for epoch in range(cfg.training_epochs):
    epoch_msg = True

    for X_train_batch, y_train_batch in train_loader:
      model.train()
      _, _, output, predicted_class = model(X_train_batch)
      loss = criterion(output, y_train_batch)

      # Backward and optimize
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      if epoch_msg and (epoch + 1) % 2 == 0:
        print(f'Epoch [{epoch + 1}/{cfg.training_epochs}], Loss: {loss.item():.4f}')

        epoch_msg = False
      # print("evaluating")
    model.eval()
    with torch.no_grad():
      _, _, output, predicted_class = model(X_test)

      # Calculate accuracy
      accuracy_temp = accuracy_score(y_test.numpy(), predicted_class.cpu().numpy())
    if epoch == 0:
      best_accuracy = accuracy_temp
      torch.save(model.state_dict(), cfg.PATH)

    elif accuracy_temp > best_accuracy:
      #memorize best model
      torch.save(model.state_dict(), cfg.PATH)
      best_accuracy = accuracy_temp
      patience_counter = 0

    elif patience_counter > cfg.patience:
      model.eval()
      print("patience exceeded, loading best model")
      break
    else:
      patience_counter += 1

  return best_accuracy, model

In [39]:
def load_model_cls(X_train,y_train,cfg):
  # Define the model architecture
  model = Cmodel.NN_k_NN(
      X_train,
      y_train,
      cfg.ca_weight_sharing,
      cfg.top_case_enabled,
      cfg.top_k,
      cfg.discount,
      device=device
  )
  # Load the state dictionary
  model.load_state_dict(torch.load(cfg.path))
  model.to(device)
  model.eval()
  return model

In [None]:
accuracies = []
knn_accuracies = []
lmnn_accuracies = []
PATH = os.path.join(folder_name, f'checkpoints/classifier_{dataset_name}.h5')
cfg.PATH = PATH
k_fold = KFold(n_splits=10, shuffle=True, random_state = None)
enable_lmnn = False

for train_index, test_index in k_fold.split(Xs):
  # Get training and testing data
  X_train, X_test = Xs[train_index], Xs[test_index]
  y_train, y_test = ys[train_index], ys[test_index]
  if(enable_lmnn):
    # https://contrib.scikit-learn.org/metric-learn/supervised.html#lmnn
    lmnn = LMNN(n_neighbors=5, learn_rate=1e-6)
    ##TODO, change here if you need to use a different one
    # lmnn = metric_learn.MLKR()
    # lmnn = metric_learn.NCA(max_iter=1000)
    lmnn.fit(X_train,y_train)
    knn = KNeighborsClassifier(n_neighbors=5,metric=lmnn.get_metric())
    knn.fit(X_train,y_train)
    # klmnn_accuracies.append( accuracy_score(knn.predict(X_test), y_test))
    lmnn_acc = accuracy_score(knn.predict(X_test), y_test)
    lmnn_accuracies.append(lmnn_acc)

  knn =  KNeighborsClassifier(n_neighbors=cfg.top_k)
  knn.fit(X_train, y_train)
  knn_acc  = accuracy_score(knn.predict(X_test), y_test)
  knn_accuracies.append(knn_acc)

  best_accuracy, model = train_cls(X_train,y_train, X_test, y_test, cfg)
  accuracies.append(best_accuracy)
  break

print(f"Average accuracy:{np.mean(accuracies):.3f}")
print(f"KNN accuracy:{np.mean(knn_accuracies):.3f}")
print(f"LMNN/NCA accuracy:{np.mean(lmnn_accuracies):.3f}")


# Regression with NNKNN

In [12]:
unique_values, counts = np.unique(ys, return_counts=True)
print(f"Unique values: {unique_values}")
print(f"Counts: {counts}")
print(f"Xs.size(): {Xs.size()}")

Unique values: [-1.3415393  -0.44717973  0.44717973  1.3415393 ]
Counts: [1651 1651 1651 1651]
Xs.size(): torch.Size([6604, 161])


In [13]:
def train_reg(X_train,y_train, X_test, y_test, cfg:DictConfig):
  X_train = X_train.to(device)
  y_train = y_train.to(device)
  X_test = X_test.to(device)

  train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_train, y_train), batch_size=cfg.batch_size, shuffle=True)
  test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_test, y_test), batch_size=cfg.batch_size, shuffle=False)


    # Train model
  model = Rmodel.NN_k_NN_regression(X_train,
                                    y_train,
                                    cfg.ca_weight_sharing,
                                    cfg.top_case_enabled,
                                    cfg.top_k,
                                    cfg.discount,
                                    cfg.class_weight_sharing,
                                    device=device)

  optimizer = torch.optim.Adam(model.parameters(), lr=cfg.learning_rate) #, weight_decay=1e-5)

  patience_counter = 0
  for epoch in range(cfg.training_epochs):
    # break # no training
    epoch_msg = True
    for X_train_batch, y_train_batch in train_loader:
      model.train()
      _, _, _, predicted_number = model(X_train_batch)
      # break
      loss = criterion(predicted_number.squeeze(), y_train_batch)
      # Backward and optimize
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      if epoch_msg and (epoch + 1) % 2 == 0:
        epoch_msg = False
        print(f'Epoch [{epoch + 1}/{cfg.training_epochs}], Loss: {loss.item():.4f}')

    model.eval()
    with torch.no_grad():
      predicted_numbers = []
      for X_test_batch, _ in test_loader:
        X_test_batch = X_test_batch.to(device)
        _, _, _, predicted_number = model(X_test_batch)
        predicted_numbers.extend(predicted_number.squeeze().cpu().detach())

      predicted_numbers = torch.Tensor(predicted_numbers)
      accuracy_temp = criterion(y_test, predicted_numbers)

    if epoch == 0:
      best_accuracy = accuracy_temp
      torch.save(model.state_dict(), cfg.PATH)
    elif accuracy_temp < best_accuracy:
      torch.save(model.state_dict(), cfg.PATH)
      best_accuracy = accuracy_temp
      patience_counter = 0
    elif patience_counter > cfg.patience:
      model.eval()
      print("patience exceeded, loading best model")
      break
    else:
      patience_counter += 1

  _, case_activations, _, predicted_number = model(X_test)

  top_case_indices = torch.topk(case_activations, 5, dim=1)[1].cpu()

  accuracy = criterion(y_test, predicted_number.squeeze().cpu())
  y_train = y_train.cpu()
  top_k_average_accuracy = mean_squared_error(torch.mean(y_train[top_case_indices], dim=1), y_test)

  return best_accuracy, accuracy, top_k_average_accuracy, model

In [14]:
# prompt: load_model_reg()

def load_model_reg(X_train,y_train,cfg):
  # Define the model architecture
  model = Rmodel.NN_k_NN_regression(
      X_train,
      y_train,
      cfg.ca_weight_sharing,
      cfg.top_case_enabled,
      cfg.top_k,
      cfg.discount,
      cfg.class_weight_sharing,
      device=device
  )
  # Load the state dictionary
  model.load_state_dict(torch.load(cfg.path))
  model.to(device)
  model.eval()
  return model


In [15]:
best_accuracies = []
accuracies = []
top_k_average_accuracies = []
knn_accuracies = []
PATH = os.path.join(folder_name, f'checkpoints/regression_{dataset_name}.h5')
cfg.PATH = PATH
k_fold = KFold(n_splits=10, shuffle = True,random_state = None)


for train_index, test_index in k_fold.split(Xs):
  # Get training and testing data
  X_train, X_test = Xs[train_index], Xs[test_index]
  y_train, y_test = ys[train_index], ys[test_index]

  knn = KNeighborsRegressor(n_neighbors=cfg.top_k)
  knn.fit(X_train, y_train)
  knn_accuracies.append(mean_squared_error(knn.predict(X_test), y_test))

  best_accuracy, accuracy, top_k_average_accuracy, model= train_reg(X_train, y_train, X_test, y_test, cfg)
  best_accuracies.append(best_accuracy)
  accuracies.append(accuracy)
  top_k_average_accuracies.append(top_k_average_accuracy)
  break
print("Average accuracy:", np.mean([acc.detach().numpy() for acc in accuracies]))
print("Average top_k_average_accuracies", np.mean(top_k_average_accuracies))
print("KNN accuracy:", np.mean(knn_accuracies))

Epoch [2/1000], Loss: 0.9530
Epoch [4/1000], Loss: 1.2346
Epoch [6/1000], Loss: 1.0282
Epoch [8/1000], Loss: 1.2116
Epoch [10/1000], Loss: 0.7650
Epoch [12/1000], Loss: 0.6646
Epoch [14/1000], Loss: 1.1751
Epoch [16/1000], Loss: 1.0585
Epoch [18/1000], Loss: 0.6065
Epoch [20/1000], Loss: 1.2232
Epoch [22/1000], Loss: 1.0657
Epoch [24/1000], Loss: 0.5398
Epoch [26/1000], Loss: 0.4660
Epoch [28/1000], Loss: 1.0912
Epoch [30/1000], Loss: 0.7082
Epoch [32/1000], Loss: 0.8464
Epoch [34/1000], Loss: 0.4497
Epoch [36/1000], Loss: 0.8295
Epoch [38/1000], Loss: 0.7380
Epoch [40/1000], Loss: 0.6853
Epoch [42/1000], Loss: 0.6481
Epoch [44/1000], Loss: 0.6393
Epoch [46/1000], Loss: 0.6188
Epoch [48/1000], Loss: 0.8710
Epoch [50/1000], Loss: 0.9190
Epoch [52/1000], Loss: 1.0682
Epoch [54/1000], Loss: 0.7960
Epoch [56/1000], Loss: 0.4212
patience exceeded, loading best model
Average accuracy: 1.1028702
Average top_k_average_accuracies 1.1869186
KNN accuracy: 1.1681862


# Results Interpretation

In [None]:
def print_model_features(input_model):
  for n, p in model.named_parameters():
    print(n)
    print(p.data)

In [None]:
print_model_features(model)

In [None]:
# for regression only. for classification is different
#feature_activations, case_activations, predicted_number
model.eval()
feature_activations, case_activations, output, predicted_class = model(X_test)

In [None]:
predicted_class

In [None]:
y_test

In [None]:
# prompt: accuracy comparing predicted_class and y_test

accuracy = accuracy_score(y_test.numpy(), predicted_class.cpu().numpy())
print("Accuracy:", accuracy)


In [None]:
#inspecting the case activations
top_case_indices = torch.topk(case_activations, 5, dim=1)[1]

In [None]:
X_test[0]

In [None]:
y_test[0]

In [None]:
X_train[top_case_indices[0][0]]

In [None]:
y_train[top_case_indices[0][0]]

By comparing the following two blocks' outputs, you can see we are retrieving a good neighbor.

In [None]:
#sum abs of X_test[0] and the top activated case
sum(abs(X_test[0] - X_train[top_case_indices[0][0]]))

In [None]:
# prompt: average sum abs of X_test[0] and X_train data
print(np.mean([sum(abs(X_test[0] - X_train[i])) for i in range(len(X_train))]))

TODO:: A better way is to show the distribution of ``X_test[0] - X_train[i]``

In [None]:
y_train[top_case_indices[0]]

In [None]:
knn.predict(X_test)[0]

In [None]:
indices = knn.kneighbors(X_test)[1][0]

In [None]:
y_train[indices]

# Sanity Check


## Classification Neural Network

In [None]:
# Hyperparameters
input_size = X_train.shape[1]
hidden_size = 1024
num_classes = torch.unique(ys).shape[0]
learning_rate = 1e-5
batch_size = 16
epochs = 2000

In [None]:
# Define the neural network architecture for classification
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.nn = nn.Sequential(
            nn.Linear(input_size, hidden_size ),
            nn.LeakyReLU(),
            nn.Linear(hidden_size , hidden_size // 2),
            # nn.Dropout(0.5),
            nn.LeakyReLU(),
            nn.Linear(hidden_size // 2, hidden_size // 4),
            # nn.Dropout(0.5),
            nn.LeakyReLU(),
            nn.Linear(hidden_size // 4, num_classes)
            )
        self.init_weights()

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                torch.nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    m.bias.data.fill_(0)

    def forward(self, x):
        return self.nn(x)


In [None]:
train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
patience_counter = 0
best_model = None
best_accuracy = None
# Initialize the model, loss function, and optimizer
model = NeuralNet(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
  epoch_msg = True
  training_total_acc = 0.0
  training_total_loss = 0.0
  num_of_batches = len(train_loader)
  for X_train_batch, y_train_batch in train_loader:
    model.train()
    # Forward pass
    outputs = model(X_train_batch)
    loss = criterion(outputs, y_train_batch)

    # Backward and optimize
    _, predicted = torch.max(outputs, 1)
    training_total_acc += torch.sum(predicted == y_train_batch).item()

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    training_total_loss += loss.item()
    # if (i + 1) % 5 == 0
  if epoch == 0 or (epoch + 1) % 100 == 0:
    print(f"Epoch: {epoch + 1}, Training Loss: {training_total_loss/num_of_batches:.2f} Acc: {training_total_acc/num_of_batches:.2f}")
  # Testing the model
  model.eval()
  with torch.no_grad():
    outputs = model(X_test)
    loss = criterion(outputs, y_test)
    _, predicted = torch.max(outputs, 1)
    accuracy = torch.sum(predicted == y_test).item() / len(y_test)
    print(f'Accuracy on the test set: {accuracy * 100:.2f}%')
    if best_accuracy is None or accuracy > best_accuracy:
      best_accuracy = accuracy
      best_model = model
      patience_counter = 0
    else:
      patience_counter += 1
    if epoch_msg and (epoch + 1) % 100 == 0:
      epoch_msg = False
      print(f'Epoch [{epoch + 1}/{epoch}], Test Loss: {loss.item()}')
  if patience_counter >= cfg.patience:
    print("Best acc achieved: ", best_accuracy)
    break

Epoch: 1, Training Loss: 1.39 Acc: 4.19
Accuracy on the test set: 26.48%
Accuracy on the test set: 30.41%
Accuracy on the test set: 29.35%
Accuracy on the test set: 32.53%
Accuracy on the test set: 30.71%
Accuracy on the test set: 32.68%
Accuracy on the test set: 32.83%
Accuracy on the test set: 30.86%
Accuracy on the test set: 33.28%
Accuracy on the test set: 32.83%
Accuracy on the test set: 32.98%
Accuracy on the test set: 32.38%
Accuracy on the test set: 31.62%
Accuracy on the test set: 32.98%
Accuracy on the test set: 31.47%
Accuracy on the test set: 32.22%
Accuracy on the test set: 32.22%
Accuracy on the test set: 31.62%
Accuracy on the test set: 31.47%
Accuracy on the test set: 32.38%
Accuracy on the test set: 32.38%
Accuracy on the test set: 31.47%
Accuracy on the test set: 32.38%
Accuracy on the test set: 32.38%
Accuracy on the test set: 32.38%
Accuracy on the test set: 31.77%
Accuracy on the test set: 31.47%
Accuracy on the test set: 31.92%
Accuracy on the test set: 31.62%
Acc

## Regression Neural Network

In [22]:
# Hyperparameters
input_size = X_train.shape[1]
hidden_size = 100
# num_classes = torch.unique(ys).shape[0]
learning_rate = 1e-5
batch_size = 16
epochs = 2000

In [23]:
# prompt: a standard neural network with 3 fully connected layers for regression

import torch
import torch.nn as nn
import torch.nn.functional as F

class RegressionNet(nn.Module):
    def __init__(self, input_size):
        super(RegressionNet, self).__init__()
        self.nn = nn.Sequential(
            nn.Linear(input_size, hidden_size ),
            nn.LeakyReLU(),
            nn.Linear(hidden_size , hidden_size // 2),
            # nn.Dropout(0.5),
            nn.LeakyReLU(),
            nn.Linear(hidden_size // 2, hidden_size // 4),
            # nn.Dropout(0.5),
            nn.LeakyReLU(),
            nn.Linear(hidden_size // 4, 1)
            )
        self.init_weights()

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                torch.nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    m.bias.data.fill_(0)

    def forward(self, x):
        return self.nn(x).squeeze()

In [25]:
train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
patience_counter = 0
best_model = None
best_accuracy = None
model = RegressionNet(Xs.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(epochs):
  epoch_msg = True
  training_total_loss = 0.0
  num_of_batches = len(train_loader)
  for X_train_batch, y_train_batch in train_loader:
    model.train()
    # Forward pass
    outputs = model(X_train_batch)
    loss = criterion(outputs, y_train_batch)
    training_total_loss += loss.item()
    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  if epoch == 0 or (epoch + 1) % 3 == 0:
    print(f'Epoch: {epoch + 1}, Training Loss: {training_total_loss/num_of_batches:.2f}')

  # Testing the model
  model.eval()
  with torch.no_grad():
    outputs = model(X_test)
    loss = criterion(outputs, y_test)
    if best_accuracy is None or loss.item() < best_accuracy:
      best_accuracy = loss.item()
      best_model = model
      patience_counter = 0
    else:
      patience_counter += 1
    if epoch_msg and (epoch + 1) % 100 == 0:
      epoch_msg = False
      print(f'Epoch [{epoch + 1}/{epochs}], Test Loss: {loss.item()}')
      # print(f'Loss on the test set: {loss.item()}')
  if patience_counter >= cfg.patience:
    print("Best loss achieved: ", best_accuracy)
    break

Epoch: 1, Training Loss: 2.88
Epoch: 3, Training Loss: 1.48
Epoch: 6, Training Loss: 1.08
Epoch: 9, Training Loss: 1.02
Epoch: 12, Training Loss: 1.00
Epoch: 15, Training Loss: 0.99
Epoch: 18, Training Loss: 0.99
Epoch: 21, Training Loss: 0.98
Epoch: 24, Training Loss: 0.97
Epoch: 27, Training Loss: 0.97
Epoch: 30, Training Loss: 0.96
Epoch: 33, Training Loss: 0.96
Epoch: 36, Training Loss: 0.95
Epoch: 39, Training Loss: 0.95
Epoch: 42, Training Loss: 0.94
Epoch: 45, Training Loss: 0.94
Epoch: 48, Training Loss: 0.93
Epoch: 51, Training Loss: 0.93
Epoch: 54, Training Loss: 0.92
Epoch: 57, Training Loss: 0.92
Epoch: 60, Training Loss: 0.91
Epoch: 63, Training Loss: 0.91
Epoch: 66, Training Loss: 0.91
Best loss achieved:  0.9809331893920898
