In [1]:
#imports
from torch import nn
import torch
import json
import pandas as pd
import os
import numpy as np
from tqdm import tqdm

import torch.optim as optim

In [2]:
#first, load the data:
def extract_df_from_jsons(json_path):
    with open(json_path, 'r') as fl:
        train_dict = json.load(fl)
    
    car_df = pd.DataFrame()
    train_df = pd.DataFrame()
    for train_id in train_dict.keys():
        train = train_dict[train_id]
        train['train_id'] = train_id
        for car_id in train['cars']:
            train['cars'][car_id]['car_id'] = car_id
            train['cars'][car_id]['train_id'] = train_id
        car_df = pd.concat([car_df, pd.DataFrame(train['cars']).T])
        tmp = train.copy()
        tmp.pop('cars')
        train_df = pd.concat([train_df, pd.DataFrame(train, index=[train_id])])
    train_df = train_df.drop('cars', axis=1)
    return car_df, train_df

In [3]:
car_df, train_df = extract_df_from_jsons('../data/train_dataset.json')

In [4]:
car_df2, train_df2 = extract_df_from_jsons('../data/train_dataset2.json')

In [7]:
car_df = pd.concat([car_df, car_df2])
train_df = pd.concat([train_df, train_df2])

In [8]:
#these ammounts of data is the same as it was in reported in the book
len(car_df), len(train_df)

(75, 18)

In [9]:
# now defining the models

# we will need 12 models
# 11 concept models
# and 1 meta-network

# Since the concept models are basically all the same, I'll define one base model and make whichever changes are needed depending on the concept.

class ConceptNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, bind_output=True):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()
        self.bind_output = bind_output

    def forward(self, x):
        x = self.fc1(x)
        hlo = self.relu(x)
        x = self.fc2(hlo)
        if self.bind_output:
            x = self.tanh(x)
        return x, hlo

class MetaNetwork(nn.Module):
    def __init__(self, hidden_size_concepts, num_concepts, hidden_size, list_of_concept_models):
        super().__init__()
        self.fc1 = nn.Linear(hidden_size_concepts * num_concepts, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 1)
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()
        
        # self.concept_models = list_of_concept_models
        
    def forward(self, x):
        
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.tanh(x)
        return x

# Initialize the networks
num_cars = ConceptNetwork(2, 20, 1, False)
num_loads = ConceptNetwork(2, 20, 1, False)
num_wheels = ConceptNetwork(3, 20, 1, False)
length = ConceptNetwork(3, 20, 1)
shape = ConceptNetwork(3, 20, 1, False)
num_car_loads = ConceptNetwork(3, 20, 1, False)
load_shape = ConceptNetwork(3, 20, 1, False)
next_crc = ConceptNetwork(3, 20, 1)
next_hex = ConceptNetwork(3, 20, 1)
next_rec = ConceptNetwork(3, 20, 1)
next_tri = ConceptNetwork(3, 20, 1)

# num_cars, num_loads
list_of_concept_models = [num_wheels, length, shape, num_car_loads, load_shape, next_crc, next_hex, next_rec, next_tri, num_cars, num_loads]

# Define the loss function and the optimizer
criterion = nn.MSELoss()

list_of_optimizers = [optim.SGD(model.parameters(), lr = 0.01, momentum=0.01) for model in list_of_concept_models]

# first I'll train each concept network for 30000 epochs (as in the book)

#split the dataset: 36-4
train_concept = car_df[car_df.train_id != '3']
test_concept = car_df[car_df.train_id == '3']
car_ids = train_concept.car_id.values.astype(np.float32)
train_ids = train_concept.train_id.values.astype(np.float32)


test_car_ids = test_concept.car_id.values.astype(np.float32)
test_train_ids = test_concept.train_id.values.astype(np.float32)
# Get the activation states of the hidden neurons in the other networks
for model, optimizer, feat in zip(list_of_concept_models[:9], list_of_optimizers, train_concept.columns[:9]):
    model.train()
    train_features = train_concept[feat].values.astype(np.float32)
    train_data = torch.tensor(list(zip(train_ids, car_ids, train_features)))
    for t in tqdm(range(30000)):
        
        prediction, _ = model(train_data)
        
        loss = criterion(prediction, torch.tensor(train_features))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    test_features = test_concept[feat].values.astype(np.float32)
    test_data = torch.tensor(list(zip(test_train_ids, test_car_ids, test_features)))
    
    model.eval()
    prediction, _ = model(test_data)
    val_loss = criterion(prediction, torch.tensor(test_features))
    print(f"Final training Loss {loss.item():.4f} for {feat} concept model.")
    print(f"Final validation Loss {val_loss.item():.4f} for {feat} concept model.")

  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 30000/30000 [00:15<00:00, 1972.41it/s]
  return F.mse_loss(input, target, reduction=self.reduction)


Final training Loss 0.0889 for num_wheels concept model.
Final validation Loss 0.2106 for num_wheels concept model.


100%|██████████| 30000/30000 [00:15<00:00, 1910.86it/s]


Final training Loss 0.9950 for length concept model.
Final validation Loss 1.0057 for length concept model.


100%|██████████| 30000/30000 [00:14<00:00, 2011.26it/s]


Final training Loss 5.6676 for shape concept model.
Final validation Loss 4.3261 for shape concept model.


100%|██████████| 30000/30000 [00:14<00:00, 2043.04it/s]


Final training Loss 0.7598 for num_load concept model.
Final validation Loss 0.2399 for num_load concept model.


100%|██████████| 30000/30000 [00:14<00:00, 2064.63it/s]


Final training Loss 3.5628 for shape_load concept model.
Final validation Loss 4.6613 for shape_load concept model.


100%|██████████| 30000/30000 [00:14<00:00, 2025.59it/s]


Final training Loss 0.8332 for next_crc concept model.
Final validation Loss 1.1645 for next_crc concept model.


100%|██████████| 30000/30000 [00:14<00:00, 2005.25it/s]


Final training Loss 0.3999 for next_hex concept model.
Final validation Loss 0.0529 for next_hex concept model.


100%|██████████| 30000/30000 [00:15<00:00, 1884.22it/s]


Final training Loss 0.9839 for next_rect concept model.
Final validation Loss 0.7516 for next_rect concept model.


100%|██████████| 30000/30000 [00:15<00:00, 1975.61it/s]

Final training Loss 0.9126 for next_tri concept model.
Final validation Loss 1.4049 for next_tri concept model.





In [10]:
#now training the two other concept models that were left out num_cars and num_loads
train_concept_num_car_features = []
train_concept_num_loads_features = []
for id in train_concept.train_id:
    train_concept_num_car_features.append(train_df.num_car.loc[id])
    train_concept_num_loads_features.append(train_df.dif_loads.loc[id])


test_concept_num_car_features = []
test_concept_num_loads_features = []
for id in test_concept.train_id:
    test_concept_num_car_features.append(train_df.num_car.loc[id])
    test_concept_num_loads_features.append(train_df.dif_loads.loc[id])


for model, optimizer, feat, test_concept in zip(list_of_concept_models[-2:], list_of_optimizers[-2:], [train_concept_num_car_features, train_concept_num_loads_features], [test_concept_num_car_features, test_concept_num_loads_features]):
    model.train()
    train_features = np.array(feat, dtype=np.float32)
    train_data = torch.tensor(list(zip(train_ids, train_features)))
    
    for t in tqdm(range(30000)):    
        prediction, _ = model(train_data)
        
        loss = criterion(prediction, torch.tensor(train_features))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

100%|██████████| 30000/30000 [00:14<00:00, 2093.03it/s]
100%|██████████| 30000/30000 [00:14<00:00, 2094.88it/s]


In [11]:
#now train the meta network that infers wether the train is going east or not

#first extracting the outputs from the hidden layers of each concept network
hidden_layer_outputs = []
for model, optimizer, feat in zip(list_of_concept_models[:9], list_of_optimizers, train_concept.columns[:9]):
    model.eval()
    train_features = train_concept[feat].values.astype(np.float32)
    train_data = torch.tensor(list(zip(train_ids, car_ids, train_features)))
    _, hlo = model(train_data)
    hidden_layer_outputs.append(hlo)
for model, optimizer, feat, test_concept in zip(list_of_concept_models[-2:], list_of_optimizers[-2:], [train_concept_num_car_features, train_concept_num_loads_features], [test_concept_num_car_features, test_concept_num_loads_features]):
    model.eval()
    train_features = np.array(feat, dtype=np.float32)
    train_data = torch.tensor(list(zip(train_ids, train_features)))
    _, hlo = model(train_data)
    hidden_layer_outputs.append(hlo)


direction_label = []
for id in train_concept.train_id:
    direction_label.append(train_df.east.loc[id])

result = torch.cat([hlo.float() for hlo in hidden_layer_outputs], dim=1)

metanetwork = MetaNetwork(20, 11, 3, list_of_concept_models)
optimizer_meta = optim.SGD(metanetwork.parameters(), lr=0.3, momentum=0.4)
for epoch in tqdm(range (10000)):
    model.train()
    predictions = metanetwork(result)
    
    loss = criterion(predictions, torch.tensor(direction_label, dtype=torch.float))
        
    optimizer.zero_grad()
    loss.backward(retain_graph=True)
    optimizer.step()

100%|██████████| 10000/10000 [00:12<00:00, 803.33it/s]


In [12]:
predictions

tensor([[0.4757],
        [0.5106],
        [0.5106],
        [0.5106],
        [0.5106],
        [0.5106],
        [0.5187],
        [0.5348],
        [0.5355],
        [0.5106],
        [0.5452],
        [0.5110],
        [0.5217],
        [0.5231],
        [0.5160],
        [0.5398],
        [0.5459],
        [0.5701],
        [0.5315],
        [0.5699],
        [0.6061],
        [0.5533],
        [0.5711],
        [0.6204],
        [0.5939],
        [0.5412],
        [0.6057],
        [0.6028],
        [0.5923],
        [0.6074],
        [0.6356],
        [0.6556],
        [0.6605],
        [0.5795],
        [0.6236],
        [0.6417],
        [0.6080],
        [0.6226],
        [0.6799],
        [0.6597],
        [0.7028],
        [0.6185],
        [0.6567],
        [0.6517],
        [0.6727],
        [0.6272],
        [0.6922],
        [0.6710],
        [0.7176],
        [0.7189],
        [0.6473],
        [0.6736],
        [0.7077],
        [0.6894],
        [0.7552],
        [0

In [13]:
prediction_dict = dict()

for id, pred, gt in zip(train_concept.train_id, predictions.detach().numpy(), direction_label):
    if id in prediction_dict.keys():
        prediction_dict[id].append(pred)
    else:
        prediction_dict[id] = [pred]

In [14]:
for key in prediction_dict.keys():
    prediction_dict[key] = np.mean(prediction_dict[key])

In [15]:
prediction_dict

{'1': 0.50363606,
 '2': 0.5249094,
 '4': 0.5223248,
 '5': 0.54297376,
 '6': 0.569166,
 '7': 0.5847024,
 '8': 0.5832562,
 '9': 0.6302744,
 '10': 0.61493224,
 '11': 0.6546005,
 '12': 0.64990735,
 '13': 0.68540365,
 '14': 0.6946336,
 '15': 0.6990939,
 '16': 0.72823846,
 '17': 0.7208233,
 '18': 0.7443426}