# Downloading necessary libraries

In [1]:
!pip -qq install torch torchvision torchaudio
!pip -qq install matplotlib pandas numpy scikit-learn openpyxl

# Import necessary libraries

In [2]:
import torch
from torch import nn
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

# Device agnostic

In [3]:
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

# Load the data

In [4]:
data = pd.read_excel("data/openings.xlsx")
data.head()

Unnamed: 0,space,room_size,capacity,user_per_min,no_of_openings,width1,width2,width3,width4,width5
0,Area for standing,50,167,5,1,0.65,0.0,0.0,0.0,0.0
1,Area for standing,75,250,5,1,0.65,0.0,0.0,0.0,0.0
2,Area for standing,100,333,5,1,0.65,0.0,0.0,0.0,0.0
3,Area for standing,125,417,5,1,0.65,0.0,0.0,0.0,0.0
4,Area for standing,150,500,5,1,0.65,0.0,0.0,0.0,0.0


In [5]:
X = data.drop(['no_of_openings', 'width1', 'width2', 'width3', 'width4', 'width5'], axis=1)

In [6]:
y = data.drop(["space", "room_size", "capacity", "user_per_min"], axis=1)

## Split the data into train and test data

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Data Preprocessing

In [8]:
num_attribs = ["room_size", "capacity", "user_per_min"]
cat_attribs = ["space"]

full_pipeline = ColumnTransformer([
        ("num", StandardScaler(), num_attribs),
        ("cat", OneHotEncoder(), cat_attribs),
    ])

In [9]:
X_train_tensor = torch.Tensor(full_pipeline.fit_transform(X_train)).to(device)
X_test_tensor = torch.Tensor(full_pipeline.transform(X_test)).to(device)
y_train_tensor = torch.from_numpy(y_train.values).float().to(device)
y_test_tensor = torch.from_numpy(y_test.values).float().to(device)

In [10]:
X_train_tensor.shape

torch.Size([6615, 13])

# Building the model

In [11]:
class OpeningsPredictor(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Linear(in_features=13, out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=6)
        )

    def forward(self, X):
        return self.layer_stack(X)

In [12]:
openings_predictor = OpeningsPredictor().to(device)

In [13]:
loss_fn = nn.L1Loss()
optimizer = torch.optim.Adam(params=openings_predictor.parameters(), lr=0.01)

In [14]:
def acc(y_pred, y_train):
    sum_ = 0
    count = 0
    for x, y in zip(y_pred, y_train):
        sum_ += (torch.round(x[0])-y[0]) + sum(x[1:]-y[1:])
        count += 1
    return sum_/count

# Training the model

In [15]:
torch.manual_seed(42)
epochs = 500

epoch_count = []
train_loss_values = []
test_loss_values = []

for epoch in range(epochs):

    # Training
    openings_predictor.train()
    train_predictions = openings_predictor(X_train_tensor)
    train_loss = loss_fn(train_predictions, y_train_tensor)
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

    # Testing
    openings_predictor.eval()
    with torch.inference_mode():
        test_predictions = openings_predictor(X_test_tensor)
        test_loss = loss_fn(test_predictions, y_test_tensor)

    # Print out what's happening
    if epoch % 100 == 0:
        epoch_count.append(epoch)
        train_loss_values.append(train_loss)
        test_loss_values.append(test_loss)
        print("Train :", acc(train_predictions, y_train_tensor))
        print("Test :", acc(test_predictions, y_test_tensor))
        print(f"Epoch: {epoch} | Loss: {train_loss:.5f} | Test Loss: {test_loss:.5f}")

  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Train : tensor(-7.8603, device='mps:0', grad_fn=<DivBackward0>)
Test : tensor(-7.3158, device='mps:0')
Epoch: 0 | Loss: 1.38998 | Test Loss: 1.27977
Train : tensor(-0.1554, device='mps:0', grad_fn=<DivBackward0>)
Test : tensor(-0.0256, device='mps:0')
Epoch: 100 | Loss: 0.43385 | Test Loss: 0.43721
Train : tensor(-0.2338, device='mps:0', grad_fn=<DivBackward0>)
Test : tensor(-0.0353, device='mps:0')
Epoch: 200 | Loss: 0.37293 | Test Loss: 0.37984
Train : tensor(-0.2018, device='mps:0', grad_fn=<DivBackward0>)
Test : tensor(-0.1134, device='mps:0')
Epoch: 300 | Loss: 0.35494 | Test Loss: 0.36109
Train : tensor(-0.1575, device='mps:0', grad_fn=<DivBackward0>)
Test : tensor(-0.1104, device='mps:0')
Epoch: 400 | Loss: 0.34758 | Test Loss: 0.35816


# Plot the loss

In [16]:
def print_predictions(predictions):
    for prediction in predictions[:10]:
        val = [0]*6
        val[0] = torch.round(prediction[0]).type(torch.int).item()
        for i in range(val[0]):
            val[i+1] = np.round(prediction[i+1].cpu().numpy(), 2)
        print(val)

with torch.inference_mode():
    print_predictions(openings_predictor(X_test_tensor))

[2, 0.82, 0.36, 0, 0, 0]
[4, 0.74, 0.75, 2.18, 2.01, 0]
[3, 0.74, 0.71, 0.76, 0, 0]
[5, 0.73, 0.75, 0.78, 0.78, 0.16]
[4, 0.73, 0.75, 1.24, 1.13, 0]
[1, 0.85, 0, 0, 0, 0]
[4, 0.73, 0.77, 0.83, 0.43, 0]
[2, 0.78, 0.53, 0, 0, 0]
[4, 0.74, 0.77, 2.08, 1.6, 0]
[4, 0.73, 0.76, 1.64, 1.36, 0]


# Saving the model

In [18]:
torch.save(openings_predictor.to("cpu"), "../models/openings.pth")

In [19]:
import joblib
joblib.dump(full_pipeline, "../models/openings_pipeline.pkl", compress=True)  

['../models/openings_pipeline.pkl']