In [1]:
import torch
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer

import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
from torch.utils.data import TensorDataset, DataLoader

In [2]:
df_train = pd.read_csv('./data/Plane_dataset/train.csv', index_col=0)
df_train['Arrival Delay in Minutes'] = df_train['Arrival Delay in Minutes'].fillna(0)

In [3]:
df_test = pd.read_csv('./data/Plane_dataset/test.csv', index_col=0)
df_test = df_test.drop(columns=['id']).dropna()

In [4]:
X_train_val = df_train.iloc[:, :-1].copy()
y_train_val = df_train.iloc[:, -1].copy()

X_train_val = X_train_val.drop(columns=['id'])

y_train_val = pd.get_dummies(y_train_val, drop_first=True)

X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1, random_state=102)

X_test = df_test.iloc[:, :-1].copy()
y_test = df_test.iloc[:, -1].copy()

y_test = pd.get_dummies(y_test, drop_first=True)

In [5]:
num_cols = ['Age', 'Flight Distance', 'Departure Delay in Minutes', 'Arrival Delay in Minutes']
cat_cols = X_train_val.drop(columns=num_cols).columns.tolist()

X_scaler = StandardScaler()
one_hot = OneHotEncoder(drop='if_binary', sparse_output=False, handle_unknown='ignore')

ct = ColumnTransformer([
        ('one_hot', one_hot, cat_cols),
        ('scaler', X_scaler, num_cols)
])

X_train_transformed = ct.fit_transform(X_train)
X_val_transformed = ct.transform(X_val)
X_test_transformed = ct.transform(X_test)

In [6]:
X_train_transformed = torch.from_numpy(X_train_transformed).float()
X_val_transformed = torch.from_numpy(X_val_transformed).float()
X_test_transformed = torch.from_numpy(X_test_transformed).float()

y_train = torch.from_numpy(y_train.values).squeeze().long()
y_val = torch.from_numpy(y_val.values).squeeze().long()
y_test = torch.from_numpy(y_test.values).squeeze().long()

In [7]:
_ = torch.manual_seed(0)

In [8]:
batch_size = 10

train_dataset = TensorDataset(X_train_transformed, y_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = TensorDataset(X_val_transformed, y_val)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


test_dataset = TensorDataset(X_test_transformed, y_test)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

device = "cpu"

In [9]:
from Model import MLP, train, test, print_size_of_model

In [10]:
net = MLP(input_size=93, output_size=2).to(device)

In [11]:
MODEL_FILENAME = './Saved_models/Plane.pt'

if Path(MODEL_FILENAME).exists():
    net.load_state_dict(torch.load(MODEL_FILENAME))
    print('Loaded model from disk')
else:
    train(net, train_loader, val_loader, 1000, 5, device=device)
    torch.save(net.state_dict(), MODEL_FILENAME)

Loaded model from disk


In [12]:
print(f'Accuracy of the model before quantization: {test(net, test_loader)}')

  0%|          | 0/2590 [00:00<?, ?it/s]

Accuracy of the model before quantization: 96.35


In [13]:
from Factorization import factorize, SVD_quant

In [14]:
reduction_rate = 3

In [15]:
import re

net_state_copy = net.state_dict().copy()
linear_weights_keys = [layer for layer in net_state_copy if 'linear' in layer and '.weight' in layer]

for layer in linear_weights_keys[:-1]:
    print(layer)
    W = net.state_dict()[layer].detach()
    # net_state_copy[layer] = factorize(W, torch.linalg.matrix_rank(W) // reduction_rate, method='min-max')
    net_state_copy[layer] = SVD_quant(W, torch.linalg.matrix_rank(W) // reduction_rate, method='mse')

linear1.weight
linear2.weight


In [16]:
new_net = MLP(input_size=93, output_size=2).to(device)
new_net.load_state_dict(net_state_copy)

test(new_net, test_loader)

  0%|          | 0/2590 [00:00<?, ?it/s]

95.45

In [17]:
import re

net_state_copy = net.state_dict().copy()
linear_weights_keys = [layer for layer in net_state_copy if 'linear' in layer and '.weight' in layer]

mem_usage_init = []
mem_usage_compressed = []

for layer in linear_weights_keys[:-1]:
    W = net.state_dict()[layer].detach()
    A, B, sc = factorize(W, torch.linalg.matrix_rank(W) // reduction_rate, return_int=True)

    mem_usage_init.append(W.element_size() * W.numel())
    mem_usage_compressed.append(A.element_size() * A.numel() + B.element_size() * B.numel())

print(f'Initial (byte): {sum(mem_usage_init)}')
print(f'Compressed (byte): {sum(mem_usage_compressed)}')

rank: 31
rank: 33
Initial (byte): 77200
Compressed (byte): 12583
