In [147]:
import pandas as pd
import numpy as np
import plotly.express as px
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import Birch
from sklearn.metrics import accuracy_score
import torch.nn as nn
import torch.optim as optim
import torch

In [148]:
user_data = pd.read_csv("../data/customers.csv").dropna()
actions_data = pd.read_csv("../data/dataset.csv").dropna().astype(int)
cars_data = pd.read_csv("../data/cars_about.csv").dropna()

## Preprocess user data

In [149]:
user_data

Unnamed: 0,customer_id,gender,married,age,graduated,profession,familySize
0,45,Male,No,22,No,Healthcare,4.0
1,817,Female,Yes,38,Yes,Engineer,3.0
2,495,Female,Yes,67,Yes,Engineer,1.0
3,36,Male,Yes,67,Yes,Lawyer,2.0
4,76,Female,Yes,40,Yes,Entertainment,6.0
...,...,...,...,...,...,...,...
315,353,Male,Yes,74,Yes,Lawyer,2.0
316,411,Female,No,30,No,Homemaker,4.0
317,297,Female,No,37,Yes,Artist,2.0
318,186,Female,No,22,No,Marketing,1.0


In [150]:
# Identify the categorical columns
cat_cols = ['gender', 'married', 'graduated', 'profession']

# Convert the categorical columns to numerical using Label Encoding
for col in cat_cols:
    le = LabelEncoder()
    user_data[col] = le.fit_transform(user_data[col])

user_data

Unnamed: 0,customer_id,gender,married,age,graduated,profession,familySize
0,45,1,0,22,0,5,4.0
1,817,0,1,38,1,2,3.0
2,495,0,1,67,1,2,1.0
3,36,1,1,67,1,7,2.0
4,76,0,1,40,1,3,6.0
...,...,...,...,...,...,...,...
315,353,1,1,74,1,7,2.0
316,411,0,0,30,0,6,4.0
317,297,0,0,37,1,0,2.0
318,186,0,0,22,0,8,1.0


## Preprocess cars data

In [151]:
cars_data = pd.read_csv("../data/cars_about.csv").dropna()

cars_data["brand"] = cars_data.car_model.apply(lambda a: a.split()[0])
cars_data = cars_data.drop(columns=["car_model", "engine", "car_id"])
cars_data["price"] = cars_data['price'].apply(lambda a: int(a[1:]))

cars_data['used_label'] = cars_data["used_label"].apply(lambda a: 1 if a=="Used" else 0)
# Identify the categorical columns
car_cat_cols = ['exteriorColor', 'interiorColor', 'drivetrain', 'fuelType', 'transmission', 'brand']

# Convert the categorical columns to numerical using Label Encoding
for col in car_cat_cols:
    le = LabelEncoder()
    cars_data[col] = le.fit_transform(cars_data[col])

cars_max_values = cars_data.max(0)
cars_data = cars_data / cars_max_values

cars_data

Unnamed: 0,used_label,price,exteriorColor,interiorColor,drivetrain,minMPG,maxMPG,fuelType,transmission,mileage,brand
0,1.0,0.319672,0.772222,0.132530,0.8,0.145038,0.230769,0.500000,0.540541,0.138042,0.933333
1,1.0,0.401639,0.805556,0.132530,0.6,0.145038,0.205128,0.500000,0.054054,0.154596,0.333333
2,1.0,0.336066,0.344444,0.132530,0.6,0.114504,0.179487,0.500000,0.540541,0.108793,0.833333
3,1.0,0.229508,0.366667,1.000000,0.8,0.221374,0.299145,0.500000,0.756757,0.049756,0.433333
4,1.0,0.401639,0.294444,0.120482,0.8,0.152672,0.230769,0.500000,0.540541,0.132099,0.633333
...,...,...,...,...,...,...,...,...,...,...,...
399,1.0,0.270492,0.255556,0.132530,0.6,0.137405,0.213675,0.500000,0.540541,0.205667,0.533333
400,0.0,0.245902,0.905556,0.686747,0.4,0.259542,0.256410,0.833333,0.756757,0.196920,0.933333
401,1.0,0.286885,0.061111,0.132530,0.4,0.167939,0.247863,0.500000,0.648649,0.231859,0.166667
402,1.0,0.286885,0.933333,0.590361,0.6,0.122137,0.188034,0.500000,0.243243,0.543638,0.200000


## Preprocess actions data

In [152]:
actions_data = actions_data.rename(columns = {"item_id":"car_id"})

In [153]:
actions_pivot_table = pd.pivot_table(actions_data.sort_values(by="car_id"), values='interaction', index='user_id', columns='car_id').fillna(0)

In [154]:
actions_data.car_id.nunique()

109

In [155]:
px.imshow(actions_pivot_table)

In [156]:
actions_data = pd.melt(actions_pivot_table.reset_index(), id_vars='user_id', value_vars=actions_pivot_table.columns).rename(columns={"value":"interaction"})

In [157]:
actions_data.interaction.unique()

array([1, 0])

In [158]:
actions_data

Unnamed: 0,user_id,car_id,interaction
0,0,0,1
1,1,0,0
2,2,0,0
3,3,0,0
4,4,0,0
...,...,...,...
8170,70,402,0
8171,71,402,0
8172,72,402,0
8173,73,402,0


In [159]:
actions_data.interaction = actions_data.interaction.apply(lambda a: 1 if a > 0.5 else 0)

In [160]:
actions_data

Unnamed: 0,user_id,car_id,interaction
0,0,0,1
1,1,0,0
2,2,0,0
3,3,0,0
4,4,0,0
...,...,...,...
8170,70,402,0
8171,71,402,0
8172,72,402,0
8173,73,402,0


In [161]:
new_actions_pivot_table = pd.pivot_table(actions_data.sort_values(by="car_id"), values='interaction', index='user_id', columns='car_id').fillna(0)

In [162]:
px.imshow(new_actions_pivot_table)

## CatBoost inputs

1. собираю фичи топ n понравившихся авто
2. конкатенирую фичи понравившихся авто с таргет авто
3. делаю бинарную классификацию 

In [163]:
N_POSITIVE = 5

In [164]:
column_names = []
cat_columns = ['used_label', 'exteriorColor', 'interiorColor', 'drivetrain', 'fuelType', 'transmission', 'brand']
cat_features = []

for index in range(N_POSITIVE):
    for column in cars_data.columns:
        column_names.append(f"car_{index}_{column}")
        
for column in cars_data.columns:
    column_names.append(f"target_{column}")

for column in column_names:
    for c_column in cat_columns:
        if c_column in column:
            cat_features.append(column)
        
dataset = pd.DataFrame(columns=column_names + ["interaction"])

for user_id in actions_data.user_id.unique():
    for target_car_id in actions_data.car_id.unique():
        # len(cars_data.columns) * (N_POSITIVE + 1))
        matrix_features = []
        features = []
        current_positive_samples = actions_data.query(f"user_id == {user_id}").query(f"car_id != {target_car_id}").query("interaction == 1")
        # print()
        if len(current_positive_samples) < N_POSITIVE: continue

        for car_id in current_positive_samples.sample(N_POSITIVE).car_id.to_list():
            matrix_features.append(cars_data.iloc[car_id].to_list())
        matrix_features.append(cars_data.iloc[target_car_id].to_list())
        matrix_features.append(int(actions_data.loc[actions_data.car_id == target_car_id].query(f"user_id == {user_id}").interaction))
        
        for item in matrix_features:
            if isinstance(item, list):
                for elem in item:
                    features.append(elem)
            else:
                features.append(item)
        # print(features)
        dataset = pd.concat([dataset, pd.DataFrame.from_records([dict(zip(dataset.columns, features))])])

# dataset = dataset.astype(int)

In [165]:
dataset

Unnamed: 0,car_0_used_label,car_0_price,car_0_exteriorColor,car_0_interiorColor,car_0_drivetrain,car_0_minMPG,car_0_maxMPG,car_0_fuelType,car_0_transmission,car_0_mileage,...,target_exteriorColor,target_interiorColor,target_drivetrain,target_minMPG,target_maxMPG,target_fuelType,target_transmission,target_mileage,target_brand,interaction
0,1.0,0.040984,0.916667,0.915663,0.8,0.175573,0.196581,0.5,0.729730,0.959812,...,0.772222,0.132530,0.8,0.145038,0.230769,0.5,0.540541,0.138042,0.933333,1
0,1.0,0.229508,0.966667,0.325301,0.6,0.122137,0.188034,0.5,0.243243,0.655385,...,0.344444,0.132530,0.6,0.114504,0.179487,0.5,0.540541,0.108793,0.833333,0
0,1.0,0.188525,0.961111,0.096386,0.4,0.137405,0.230769,0.5,0.243243,0.468061,...,0.366667,1.000000,0.8,0.221374,0.299145,0.5,0.756757,0.049756,0.433333,0
0,1.0,0.319672,0.772222,0.132530,0.8,0.145038,0.230769,0.5,0.540541,0.138042,...,0.294444,0.120482,0.8,0.152672,0.230769,0.5,0.540541,0.132099,0.633333,0
0,1.0,0.147541,0.472222,0.819277,1.0,0.160305,0.256410,0.5,0.405405,0.273038,...,0.061111,1.000000,1.0,0.129771,0.196581,0.5,0.162162,0.495160,0.933333,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,1.0,0.163934,0.577778,0.132530,0.8,0.213740,0.290598,0.5,0.756757,0.049099,...,0.272222,0.530120,0.8,0.152672,0.222222,0.5,0.540541,0.116399,0.566667,1
0,1.0,0.131148,0.366667,0.132530,1.0,0.145038,0.239316,0.5,0.243243,0.228192,...,0.061111,0.132530,0.4,0.160305,0.239316,0.5,0.648649,0.050559,0.733333,0
0,1.0,0.360656,0.727778,0.096386,0.4,0.145038,0.222222,0.5,0.648649,0.067817,...,0.311111,0.421687,0.4,0.145038,0.222222,0.5,0.540541,0.105596,0.333333,0
0,1.0,0.131148,0.366667,0.132530,1.0,0.145038,0.239316,0.5,0.243243,0.228192,...,0.500000,0.301205,0.8,0.198473,0.282051,0.5,0.756757,0.109484,0.766667,0


In [166]:
dataset.shape

(6931, 67)

In [167]:
dataset.interaction.value_counts()

0    6438
1     493
Name: interaction, dtype: int64

In [168]:
dataset

Unnamed: 0,car_0_used_label,car_0_price,car_0_exteriorColor,car_0_interiorColor,car_0_drivetrain,car_0_minMPG,car_0_maxMPG,car_0_fuelType,car_0_transmission,car_0_mileage,...,target_exteriorColor,target_interiorColor,target_drivetrain,target_minMPG,target_maxMPG,target_fuelType,target_transmission,target_mileage,target_brand,interaction
0,1.0,0.040984,0.916667,0.915663,0.8,0.175573,0.196581,0.5,0.729730,0.959812,...,0.772222,0.132530,0.8,0.145038,0.230769,0.5,0.540541,0.138042,0.933333,1
0,1.0,0.229508,0.966667,0.325301,0.6,0.122137,0.188034,0.5,0.243243,0.655385,...,0.344444,0.132530,0.6,0.114504,0.179487,0.5,0.540541,0.108793,0.833333,0
0,1.0,0.188525,0.961111,0.096386,0.4,0.137405,0.230769,0.5,0.243243,0.468061,...,0.366667,1.000000,0.8,0.221374,0.299145,0.5,0.756757,0.049756,0.433333,0
0,1.0,0.319672,0.772222,0.132530,0.8,0.145038,0.230769,0.5,0.540541,0.138042,...,0.294444,0.120482,0.8,0.152672,0.230769,0.5,0.540541,0.132099,0.633333,0
0,1.0,0.147541,0.472222,0.819277,1.0,0.160305,0.256410,0.5,0.405405,0.273038,...,0.061111,1.000000,1.0,0.129771,0.196581,0.5,0.162162,0.495160,0.933333,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,1.0,0.163934,0.577778,0.132530,0.8,0.213740,0.290598,0.5,0.756757,0.049099,...,0.272222,0.530120,0.8,0.152672,0.222222,0.5,0.540541,0.116399,0.566667,1
0,1.0,0.131148,0.366667,0.132530,1.0,0.145038,0.239316,0.5,0.243243,0.228192,...,0.061111,0.132530,0.4,0.160305,0.239316,0.5,0.648649,0.050559,0.733333,0
0,1.0,0.360656,0.727778,0.096386,0.4,0.145038,0.222222,0.5,0.648649,0.067817,...,0.311111,0.421687,0.4,0.145038,0.222222,0.5,0.540541,0.105596,0.333333,0
0,1.0,0.131148,0.366667,0.132530,1.0,0.145038,0.239316,0.5,0.243243,0.228192,...,0.500000,0.301205,0.8,0.198473,0.282051,0.5,0.756757,0.109484,0.766667,0


In [169]:
px.bar(dataset.interaction.value_counts())

In [212]:
balanced_dataset = pd.concat([dataset.loc[dataset.interaction == 1], dataset.loc[dataset.interaction == 0].sample(int(dataset.interaction.value_counts()[1] * 2))])

In [213]:
px.bar(balanced_dataset.interaction.value_counts())

In [214]:
balanced_dataset

Unnamed: 0,car_0_used_label,car_0_price,car_0_exteriorColor,car_0_interiorColor,car_0_drivetrain,car_0_minMPG,car_0_maxMPG,car_0_fuelType,car_0_transmission,car_0_mileage,...,target_exteriorColor,target_interiorColor,target_drivetrain,target_minMPG,target_maxMPG,target_fuelType,target_transmission,target_mileage,target_brand,interaction
0,1.0,0.286885,0.933333,0.590361,0.6,0.122137,0.188034,0.5,0.243243,0.543638,...,0.772222,0.132530,0.8,0.145038,0.230769,0.5,0.540541,0.138042,0.933333,1
0,1.0,0.229508,0.966667,0.325301,0.6,0.122137,0.188034,0.5,0.243243,0.655385,...,0.061111,1.000000,1.0,0.129771,0.196581,0.5,0.162162,0.495160,0.933333,1
0,1.0,0.229508,0.966667,0.325301,0.6,0.122137,0.188034,0.5,0.243243,0.655385,...,0.472222,0.819277,1.0,0.160305,0.256410,0.5,0.405405,0.273038,0.733333,1
0,1.0,0.040984,0.916667,0.915663,0.8,0.175573,0.196581,0.5,0.729730,0.959812,...,0.927778,0.132530,1.0,0.114504,0.205128,0.5,0.540541,0.120385,0.266667,1
0,1.0,0.319672,0.772222,0.132530,0.8,0.145038,0.230769,0.5,0.540541,0.138042,...,0.422222,0.891566,1.0,0.145038,0.239316,0.5,0.540541,0.048056,0.633333,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,1.0,0.254098,0.488889,0.445783,0.6,0.122137,0.188034,0.5,0.243243,0.223761,...,0.061111,0.975904,0.4,0.129771,0.230769,0.5,0.648649,0.167380,0.733333,0
0,1.0,0.426230,0.611111,0.771084,0.4,0.175573,0.239316,0.5,0.432432,0.066770,...,0.355556,0.626506,0.4,0.160305,0.239316,0.5,0.648649,0.207845,0.733333,0
0,0.0,0.311475,0.666667,0.674699,0.8,0.145038,0.239316,0.5,0.648649,0.068047,...,0.933333,0.590361,0.6,0.122137,0.188034,0.5,0.243243,0.543638,0.200000,0
0,1.0,0.204918,1.000000,1.000000,1.0,0.114504,0.188034,0.0,0.243243,0.634873,...,0.366667,1.000000,0.8,0.221374,0.299145,0.5,0.756757,0.049756,0.433333,0


In [215]:
from sklearn.model_selection import train_test_split
# Split into training and validation sets
train_df, val_df = train_test_split(balanced_dataset.astype(int), test_size=0.2, random_state=42)
X_train, y_train = train_df.drop(columns=['interaction']), train_df[["interaction"]]

# Split into validation and test sets
val_df, test_df = train_test_split(val_df, test_size=0.2, random_state=42)

X_val, y_val = val_df.drop(columns=['interaction']), val_df[["interaction"]]
X_test, y_test = test_df.drop(columns=['interaction']), test_df[["interaction"]]

## Linear Layers Stack

In [216]:
# Define the neural network
class BinaryClassification(nn.Module):
    def __init__(self):
        super(BinaryClassification, self).__init__()
        # Number of input features is 12.
        self.layer_1 = nn.Linear(66, 64) 
        self.layer_2 = nn.Linear(64, 64)
        self.layer_out = nn.Linear(64, 1) 
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1)
        self.batchnorm1 = nn.BatchNorm1d(64)
        self.batchnorm2 = nn.BatchNorm1d(64)
        
    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.batchnorm1(x)
        x = self.relu(self.layer_2(x))
        x = self.batchnorm2(x)
        x = self.dropout(x)
        x = self.layer_out(x)
        
        return x

# Instantiate the neural network
net = BinaryClassification()

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Load the dataset

X_test_tensor = torch.from_numpy(X_val.values).float()
y_test_tensor = torch.from_numpy(np.array(y_val)).float()

X_train_tensor = torch.from_numpy(np.array(X_train)).float()
y_train_tensor = torch.from_numpy(np.array(y_train)).float()

epochs = 400
# Train the neural network
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = torch.sigmoid(net(X_train_tensor))
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    
    net.eval()

    train_accuracy = accuracy_score(
            y_train_tensor.flatten().int().tolist(), 
            [int(x.item() > 0.5) for x in torch.sigmoid(net(X_train_tensor))]
        )
    
    test_accurracy = accuracy_score(
        y_test_tensor.flatten().int().tolist(), 
        [int(x.item() > 0.5) for x in torch.sigmoid(net(X_test_tensor))]
        )
    
    net.train()
    # Print the loss every 10 epochs
    if (epoch+1) % 10 == 0:
        print('Epoch [%d/%d], Loss: %.4f train_accuracy: %.4f test_accurracy: %.4f' % (epoch+1, epochs, loss.item(), train_accuracy, test_accurracy))


Epoch [10/400], Loss: 0.2026 train_accuracy: 0.7099 test_accurracy: 0.6528
Epoch [20/400], Loss: 0.1631 train_accuracy: 0.7541 test_accurracy: 0.6667
Epoch [30/400], Loss: 0.1275 train_accuracy: 0.7956 test_accurracy: 0.6528
Epoch [40/400], Loss: 0.0971 train_accuracy: 0.8536 test_accurracy: 0.6250
Epoch [50/400], Loss: 0.0691 train_accuracy: 0.9144 test_accurracy: 0.5694
Epoch [60/400], Loss: 0.0521 train_accuracy: 0.9448 test_accurracy: 0.5833
Epoch [70/400], Loss: 0.0394 train_accuracy: 0.9558 test_accurracy: 0.5833
Epoch [80/400], Loss: 0.0325 train_accuracy: 0.9586 test_accurracy: 0.5556
Epoch [90/400], Loss: 0.0273 train_accuracy: 0.9586 test_accurracy: 0.5417
Epoch [100/400], Loss: 0.0248 train_accuracy: 0.9586 test_accurracy: 0.5278
Epoch [110/400], Loss: 0.0243 train_accuracy: 0.9613 test_accurracy: 0.5278
Epoch [120/400], Loss: 0.0223 train_accuracy: 0.9613 test_accurracy: 0.5278
Epoch [130/400], Loss: 0.0219 train_accuracy: 0.9613 test_accurracy: 0.5139
Epoch [140/400], Loss

In [217]:
net = net.eval()

In [218]:
X_val.values.shape

(72, 66)

In [219]:
from sklearn.metrics import accuracy_score

net.eval()

accuracy_score(
    y_test_tensor.flatten().int().tolist(), 
    [int(x.item() > 0.6) for x in torch.sigmoid(net(X_test_tensor))]
    )

0.5694444444444444

In [220]:
from sklearn import metrics
y_pred_proba = torch.sigmoid(net(X_test_tensor)).flatten().tolist()
fpr, tpr, thresholds = metrics.roc_curve(y_test_tensor.flatten().int().tolist(), y_pred_proba)

# The histogram of scores compared to true labels
fig_hist = px.histogram(
    x=y_pred_proba, color=y_test_tensor.flatten().int().tolist(), nbins=50,
    labels=dict(color='True Labels', x='Score')
)

fig_hist.show()


# Evaluating model performance at various thresholds
df = pd.DataFrame({
    'False Positive Rate': fpr,
    'True Positive Rate': tpr
}, index=thresholds)
df.index.name = "Thresholds"
df.columns.name = "Rate"

fig_thresh = px.line(
    df, title='TPR and FPR at every threshold',
    width=700, height=500
)

fig_thresh.update_yaxes(scaleanchor="x", scaleratio=1)
fig_thresh.update_xaxes(range=[0, 1], constrain='domain')
fig_thresh.show()

In [221]:
from sklearn.metrics import auc
fig = px.area(
    x=fpr, y=tpr,
    title=f'ROC Curve (AUC={auc(fpr, tpr):.4f}) ::: LinearModel',
    labels=dict(x='False Positive Rate', y='True Positive Rate'),
    width=700, height=500
)
fig.add_shape(
    type='line', line=dict(dash='dash'),
    x0=0, x1=1, y0=0, y1=1
)

fig.update_yaxes(scaleanchor="x", scaleratio=1)
fig.update_xaxes(constrain='domain')
fig.show()

In [222]:
cars_data = pd.read_csv("../data/cars_about.csv").dropna()

cars_data["brand"] = cars_data.car_model.apply(lambda a: a.split()[0])
cars_data = cars_data.drop(columns=["car_model", "engine", "car_id"])
cars_data["price"] = cars_data['price'].apply(lambda a: int(a[1:]))

cars_data['used_label'] = cars_data["used_label"].apply(lambda a: 1 if a=="Used" else 0)
# Identify the categorical columns
car_cat_cols = ['exteriorColor', 'interiorColor', 'drivetrain', 'fuelType', 'transmission', 'brand']

# Convert the categorical columns to numerical using Label Encoding
for col in car_cat_cols:
    le = LabelEncoder()
    cars_data[col] = le.fit_transform(cars_data[col])

cars_data = cars_data / cars_max_values

column_names = []
cat_columns = ['used_label', 'exteriorColor', 'interiorColor', 'drivetrain', 'fuelType', 'transmission', 'brand']
cat_features = []

for index in range(N_POSITIVE):
    for column in cars_data.columns:
        column_names.append(f"car_{index}_{column}")
        
for column in cars_data.columns:
    column_names.append(f"target_{column}")

for column in column_names:
    for c_column in cat_columns:
        if c_column in column:
            cat_features.append(column)

true_table = np.zeros((actions_data.user_id.nunique(), actions_data.car_id.unique().max()))
pred_table = true_table.copy()

dataset = pd.DataFrame(columns=column_names + ["interaction"])

net.eval()
net.cuda()

for user_index, user_id in enumerate(actions_data.user_id.unique()):
    for target_car_index, target_car_id in enumerate(actions_data.car_id.unique()):
        matrix_features = []
        features = []
        current_positive_samples = actions_data.query(f"user_id == {user_id}").query(f"car_id != {target_car_id}").query("interaction == 1")
        if len(current_positive_samples) < N_POSITIVE: continue

        for car_id in current_positive_samples.sample(N_POSITIVE).car_id.to_list():
            matrix_features.append(cars_data.iloc[car_id].to_list())
        matrix_features.append(cars_data.iloc[target_car_id].to_list())
        matrix_features.append(int(actions_data.loc[actions_data.car_id == target_car_id].query(f"user_id == {user_id}").interaction))
        
        for item in matrix_features:
            if isinstance(item, list):
                for elem in item:
                    features.append(elem)
            else:
                features.append(item)

        true_table[user_index, target_car_id-1] = features[-1]
        input_data = torch.tensor(features[:-1]).unsqueeze(dim=0)
        input_data = input_data.to('cuda')

        pred_table[user_index, target_car_id-1] =  torch.sigmoid(net(input_data)).item()

        dataset = pd.concat([
            dataset, 
            pd.DataFrame.from_records(
            [dict(zip(dataset.columns, features))]
        )])

dataset = dataset.astype(int)

In [223]:
px.imshow(true_table, width=1600, height=400)

In [224]:
px.imshow(pred_table, width=1600, height=400)