# Downloading necessary libraries

In [57]:
!pip -qq install torch torchvision torchaudio
!pip -qq install matplotlib pandas numpy scikit-learn openpyxl


# Import necessary libraries

In [62]:
import torch
from torch import nn
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

# Device agnostic

In [14]:
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

# Load the data

In [41]:
data = pd.read_csv("data/openings.csv")
data.head()

Unnamed: 0,space,room_size,capacity,user_per_min,width1,width2,width3,width4,width5,no_of_openings_1,no_of_openings_2,no_of_openings_3,no_of_openings_4,no_of_openings_5
0,Area for standing,50,167,5,0.65,0.0,0.0,0.0,0.0,True,False,False,False,False
1,Area for standing,75,250,5,0.65,0.0,0.0,0.0,0.0,True,False,False,False,False
2,Area for standing,100,333,5,0.65,0.0,0.0,0.0,0.0,True,False,False,False,False
3,Area for standing,125,417,5,0.65,0.0,0.0,0.0,0.0,True,False,False,False,False
4,Area for standing,150,500,5,0.65,0.0,0.0,0.0,0.0,True,False,False,False,False


In [42]:
X = data.drop(['no_of_openings_1', 'no_of_openings_2', 'no_of_openings_3', 'no_of_openings_4', 'no_of_openings_5', 'width1', 'width2', 'width3', 'width4', 'width5'], axis=1)

In [43]:
y = data.drop(["space", "room_size", "capacity", "user_per_min"], axis=1)

In [44]:
X.shape, y.shape

((7350, 4), (7350, 10))

## Split the data into train and test data

In [45]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [46]:
y_train = np.array(y_train.values, dtype=np.float32)
y_test = np.array(y_test.values, dtype=np.float32)

# Data Preprocessing

In [47]:
num_attribs = ["room_size", "capacity", "user_per_min"]
cat_attribs = ["space"]

full_pipeline = ColumnTransformer([
        ("num", StandardScaler(), num_attribs),
        ("cat", OneHotEncoder(), cat_attribs),
    ])

In [48]:
X_train_tensor = torch.Tensor(full_pipeline.fit_transform(X_train)).to(device)
X_test_tensor = torch.Tensor(full_pipeline.transform(X_test)).to(device)
y_train_tensor = torch.from_numpy(y_train).to(device)
y_test_tensor = torch.from_numpy(y_test).to(device)

In [52]:
y_train_tensor[0]

tensor([0.7500, 0.7500, 2.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000,
        0.0000], device='mps:0')

In [50]:
X_train_tensor.shape

torch.Size([6615, 13])

# Building the model

In [79]:
class OpeningsPredictor(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Linear(in_features=13, out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=10)
        )

    def forward(self, X):
        return self.layer_stack(X)

In [80]:
openings_predictor = OpeningsPredictor().to(device)

In [81]:
loss_fn = nn.L1Loss()
optimizer = torch.optim.Adam(params=openings_predictor.parameters(), lr=0.01)

In [82]:
def acc(y_pred, y_train):
    sum_ = 0
    count = 0
    for x, y in zip(y_pred, y_train):
        sum_ += (torch.round(x[0])-y[0]) + sum(x[1:]-y[1:])
        count += 1
    return sum_/count

# Training the model

In [83]:
torch.manual_seed(42)
epochs = 1000

epoch_count = []
train_loss_values = []
test_loss_values = []

for epoch in range(epochs):

    # Training
    openings_predictor.train()
    train_predictions = openings_predictor(X_train_tensor)
    train_loss = loss_fn(train_predictions, y_train_tensor)
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

    # Testing
    openings_predictor.eval()
    with torch.inference_mode():
        test_predictions = openings_predictor(X_test_tensor)
        test_loss = loss_fn(test_predictions, y_test_tensor)

    # Print out what's happening
    if epoch % 100 == 0:
        epoch_count.append(epoch)
        train_loss_values.append(train_loss)
        test_loss_values.append(test_loss)
        print(f"Epoch: {epoch} | Loss: {train_loss:.5f} Acc: {acc(train_predictions, y_train_tensor).item():.4f}| Test Loss: {test_loss:.5f} Test Acc: {acc(test_predictions, y_test_tensor).item():.4f}")

Epoch: 0 | Loss: 0.55873 Acc: -5.0489| Test Loss: 0.50321 Test Acc: -4.6940
Epoch: 100 | Loss: 0.26098 Acc: -0.7066| Test Loss: 0.26283 Test Acc: -0.6662
Epoch: 200 | Loss: 0.24391 Acc: -0.8155| Test Loss: 0.24795 Test Acc: -0.7449
Epoch: 300 | Loss: 0.22872 Acc: -0.6225| Test Loss: 0.23723 Test Acc: -0.5190
Epoch: 400 | Loss: 0.22426 Acc: -0.7084| Test Loss: 0.23282 Test Acc: -0.6571
Epoch: 500 | Loss: 0.21350 Acc: -0.5528| Test Loss: 0.22291 Test Acc: -0.5041
Epoch: 600 | Loss: 0.21020 Acc: -0.3312| Test Loss: 0.21519 Test Acc: -0.3914
Epoch: 700 | Loss: 0.19767 Acc: -0.3103| Test Loss: 0.20690 Test Acc: -0.3415
Epoch: 800 | Loss: 0.18650 Acc: -0.2978| Test Loss: 0.19660 Test Acc: -0.1566
Epoch: 900 | Loss: 0.18406 Acc: -0.0119| Test Loss: 0.19085 Test Acc: -0.0977


In [88]:
openings_predictor.eval()
with torch.inference_mode():
    test_predictions = openings_predictor(X_test_tensor)
    print(test_predictions[0])
    test_loss = loss_fn(test_predictions, y_test_tensor)
print(f"Test Loss: {test_loss:.5f} Test Acc: {acc(test_predictions, y_test_tensor).item():.4f}")

tensor([ 2.1011,  0.2287,  0.0493, -0.0136, -0.0295,  0.7559,  0.1612,  0.0187,
         0.0085, -0.0381], device='mps:0')
Test Loss: 0.18141 Test Acc: 0.1991


# Plot the loss

In [93]:
def print_predictions(predictions):
    for prediction in predictions[:10]:
        val = [0]*6
        val[0] = torch.argmax(prediction[5:]).type(torch.int).item() + 1
        for i in range(val[0]):
            val[i+1] = np.round(prediction[i].cpu().numpy(), 2)
        print(val)

with torch.inference_mode():
    print_predictions(openings_predictor(X_test_tensor))

[1, 2.1, 0, 0, 0, 0]
[5, 0.75, 0.73, 1.91, 1.91, 2.03]
[3, 0.76, 0.79, 0.72, 0, 0]
[5, 0.76, 0.74, 0.77, 0.91, 0.63]
[5, 0.75, 1.09, 1.21, 0.67, 0.49]
[1, 2.12, 0, 0, 0, 0]
[4, 0.76, 0.74, 0.8, 0.91, 0]
[2, 0.77, 0.97, 0, 0, 0]
[3, 0.76, 1.77, 2.4, 0, 0]
[3, 0.75, 1.49, 1.88, 0, 0]


# Saving the model

In [94]:
torch.save(openings_predictor.to("cpu"), "../models/openings.pth")

In [95]:
import joblib
joblib.dump(full_pipeline, "../models/openings_pipeline.pkl", compress=True)  

['../models/openings_pipeline.pkl']