In [116]:
from collections import OrderedDict
import pickle
from torch.utils.data import Dataset
import torch
import numpy as np 

class ChoiceDataset(Dataset):
    def __init__(self, data_path, data_file):
        """
        Parameters:
            data_file (string): name of data pickle file
        """
        data = pickle.load(open(data_path + "/" + data_file, "rb"))
        self.x = torch.Tensor(data["x"])
        self.x_names = data["x_names"]
        self.N = len(self.x)
        
        dic_attr = \
        {"TRAIN": ["TRAIN_TT", "TRAIN_HE", "TRAIN_CO"], \
         "SM": ["SM_TT", "SM_HE", "SM_CO", "SM_SEATS"],\
         "CAR": ["CAR_TT", "CAR_CO"]}
        
        self.x_dict = {}
        for mode in dic_attr:
            self.x_dict[mode] = {}
            for attr in dic_attr[mode]:
                self.x_dict[mode].update({attr: getAttribute(self.x, self.x_names, attr)})
        
        self.y = torch.LongTensor(data["y"])-1 # N
        
        # Availability 
        self.av = torch.cat([torch.ones(self.N,2),torch.Tensor(data["car_av"]).view(self.N,1)], dim=1) # (N,3) av for all modes 
        
        # all z 
        self.z_all_names = data['z_names']
        self.z_levels = data['z_levels']
        self.z_all = torch.Tensor(data['z']) # N,D socio-demo variables

        # select z
        self.z_names = ["MALE_1", "AGE_1", "AGE_2", "AGE_3", "AGE_4", \
               "INCOME_1", "INCOME_2", "INCOME_3", "FIRST_1", "WHO_1", "WHO_2", \
               "PURPOSE_1", "PURPOSE_2", "PURPOSE_3", "LUGGAGE_1", "LUGGAGE_2", "GA_1"]
        self.z = selectZ(self.z_all, self.z_names, self.z_all_names)
        
        _, self.D = self.z.size() # z size = (N,D)

        
    def __len__(self):
        return self.N
    
    def __getitem__(self, idx):
        '''
        Get the sample given its idx in the list 
        '''
        x = {}
        for mode in self.x_dict:
            x[mode] = {}
            for name in self.x_dict[mode]:
                x[mode][name] = self.x_dict[mode][name][idx]
        return {"x": x, "y": self.y[idx], "z":self.z[idx], "av": self.av[idx]}
    
def getAttribute(x, x_names, name):
    return x[:,x_names.index(name)]
    
def selectZ(z,z_selected, z_names):
    ind = []
    for var in z_selected:
        ind.append(z_names.index(var))
    return z[:,np.array(ind).astype(int)]

In [117]:
from data_utils import ChoiceDataset
data_test = "test.pkl"
ds_test = ChoiceDataset('/mnt/md0/TasteNet-MNL/swissmetro/data/', data_test)

In [85]:
data = pickle.load(open('/mnt/md0/TasteNet-MNL/swissmetro/data/' + data_test, "rb"))
print(data["car_av"])
x = torch.Tensor(data["x"])
N = len(x)
print(N)
print(ds_test[:4])

[0 1 1 ... 1 0 1]
1604
{'x': {'TRAIN': {'TRAIN_TT': tensor([1.1600, 2.7200, 1.4300, 2.0100]), 'TRAIN_HE': tensor([0.3000, 0.6000, 0.6000, 0.6000]), 'TRAIN_CO': tensor([0.3500, 1.3400, 0.7500, 1.6600])}, 'SM': {'SM_TT': tensor([0.7300, 1.3500, 0.9800, 1.6200]), 'SM_HE': tensor([0.2000, 0.2000, 0.1000, 0.3000]), 'SM_CO': tensor([0.4500, 1.4600, 0.9000, 2.1000]), 'SM_SEATS': tensor([0., 0., 0., 0.])}, 'CAR': {'CAR_TT': tensor([0.0000, 1.6800, 1.3000, 1.4400]), 'CAR_CO': tensor([0.0000, 1.2800, 0.8400, 1.3600])}}, 'y': tensor([0, 2, 1, 2]), 'z': tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 1., 1., 0., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0.],
        [1., 0., 0., 1., 0., 0., 1., 0., 1., 1., 0., 0., 1., 0., 1., 0., 0.]]), 'av': tensor([[1., 1., 0.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])}


In [None]:
ds_test.z.size()

In [None]:
# Open json file
import json
with open('/mnt/md0/TasteNet-MNL/sg/RandomSplitData1_PassengerType_TripEncoded.json') as json_file:
    data = json.load(json_file)

In [167]:
import pandas as pd
# Import /mnt/md0/TasteNet-MNL/sg/RandomSplitData1_PassengerType_TripEncoded.json as df
df = pd.read_json('/mnt/md0/TasteNet-MNL/sg/RandomSplitData1_PassengerType_TripEncoded.json')

In [209]:
# Get columns
df.columns

Index(['Jid', 'first_stop', 'last_stop', 'Time', 'CommNodeOrigin',
       'CommNodeDest', 'NoRoutes', 'routes_summary', 'Mode_Type', 'IVTT',
       'NoT', 'PT_dst', 'PT_fare', 'WalkTime', 'PT_route_choice',
       'choice_index', 'OD', 'passengerType', 'Adult', 'Student', 'Senior',
       'ODEncoded', 'TransferEncoded', 'PassengerTypeEncoded'],
      dtype='object')

In [190]:
# Show all columns
pd.set_option('display.max_columns', None)
df['choice_index'].max()

14

In [229]:
import pandas as pd
import torch
from torch.utils.data import Dataset

class ChoiceDataset(Dataset):
    def __init__(self, df):
        # 1. Get the max number of routes
        self.maxNoRoutes = df['NoRoutes'].max()

        # 2. Iterate each row (in vectorized manner). Each row's NoRoutes is the number of routes. Beyond that, all zero
        x = np.zeros((len(df), self.maxNoRoutes, 4))
        for i in range(len(df)):
            for j in range(df['NoRoutes'].iloc[i]):
                x[i, j, 0] = df['IVTT'].iloc[i][j]
                x[i, j, 1] = df['PT_fare'].iloc[i][j]
                x[i, j, 2] = df['WalkTime'].iloc[i][j]
                x[i, j, 3] = df['NoT'].iloc[i][j]
        self.x = torch.Tensor(x.reshape(len(df), 4*self.maxNoRoutes))
        # 3. Create names IVTT_1, PT_fare_1. WalkTime_1, NoT_1, IVTT_2, ... IVTT_max_routes, PT_fare_max_routes, WalkTime_max_routes, NoT_max_routes
        self.x_names = [[f'IVTT{i}', f'PT_fare{i}', f'WalkTime{i}', f'NoT{i}'] for i in range(0, self.maxNoRoutes)]
        # Flatten list of lists
        self.x_names = [item for sublist in self.x_names for item in sublist]
        self.N = len(df)
        
        # Create dic_attr depending on df['NoRoutes']
        dic_attr = {}
        for i in range(0, self.maxNoRoutes):
            dic_attr.update({str(i): [f'IVTT{i}', f'PT_fare{i}', f'WalkTime{i}', f'NoT{i}']})

        self.x_dict = {}
        for mode in dic_attr:
            self.x_dict[mode] = {}
            for attr in dic_attr[mode]:
                self.x_dict[mode].update({attr: getAttribute(self.x, self.x_names, attr)})

        self.y = torch.LongTensor(df['choice_index'].values)  # N

        # Availability
        NoRoutes = torch.Tensor(df['NoRoutes'].values)
        N = len(NoRoutes)
        NoRoutes = NoRoutes.unsqueeze(1)
        av_ones = torch.ones(N, self.maxNoRoutes*4)

        # Create a mask indicating where to place zeros based on `NoRoutes` values
        mask = torch.arange(self.maxNoRoutes*4).unsqueeze(0) < NoRoutes*4

        # Apply the mask to the `av_ones` tensor to set zeros where needed
        self.av = av_ones * mask.float()

        # all z 
        # Rename df columns 'Adult', 'Student', 'Senior' to 'CARDTYPE_0', 'CARDTYPE_1', 'CARDTYPE_2'
        df.rename(columns={'Adult': 'CARDTYPE_0', 'Student': 'CARDTYPE_1', 'Senior': 'CARDTYPE_2'}, inplace=True)
        self.z_all_names = ['CARDTYPE_0', 'CARDTYPE_1', 'CARDTYPE_2']
        self.z_levels = OrderedDict([('CARDTYPE_0', 3)])
        self.z_all = torch.stack([torch.tensor(elem) for elem in df['PassengerTypeEncoded'].apply(lambda x: torch.Tensor(x[0])).values])# N,D socio-demo variables


        # select z
        self.z_names = ["CARDTYPE_1", "CARDTYPE_2"]
        self.z = selectZ(self.z_all, self.z_names, self.z_all_names)
        _, self.D = self.z.size()  # z size = (N,D)

    def __len__(self):
        return self.N

    def __getitem__(self, idx):
        x = {}
        for mode in self.x_dict:
            x[mode] = {}
            for name in self.x_dict[mode]:
                x[mode][name] = self.x_dict[mode][name][idx]
        return {"x": x, "y": self.y[idx], "z": self.z[idx], "av": self.av[idx]}


def getAttribute(x, x_names, name):
    return x[:, x_names.index(name)]


def selectZ(z, z_selected, z_names):
    ind = []
    for var in z_selected:
        ind.append(z_names.index(var))
    return z[:, ind]


# Create the ChoiceDataset object
dataset = ChoiceDataset(df)

  self.z_all = torch.stack([torch.tensor(elem) for elem in df['PassengerTypeEncoded'].apply(lambda x: torch.Tensor(x[0])).values])# N,D socio-demo variables


In [231]:
for index, data in enumerate(dataset):
    print(index, data)
    if index == 3:
        break

0 {'x': {'0': {'IVTT0': tensor(3242.), 'PT_fare0': tensor(1.3800), 'WalkTime0': tensor(318.), 'NoT0': tensor(1.)}, '1': {'IVTT1': tensor(3708.), 'PT_fare1': tensor(1.3800), 'WalkTime1': tensor(318.), 'NoT1': tensor(1.)}, '2': {'IVTT2': tensor(3829.), 'PT_fare2': tensor(1.3800), 'WalkTime2': tensor(318.), 'NoT2': tensor(1.)}, '3': {'IVTT3': tensor(3347.), 'PT_fare3': tensor(1.4400), 'WalkTime3': tensor(339.), 'NoT3': tensor(1.)}, '4': {'IVTT4': tensor(3583.), 'PT_fare4': tensor(1.3800), 'WalkTime4': tensor(318.), 'NoT4': tensor(1.)}, '5': {'IVTT5': tensor(1184.), 'PT_fare5': tensor(1.3800), 'WalkTime5': tensor(374.), 'NoT5': tensor(0.)}, '6': {'IVTT6': tensor(2437.), 'PT_fare6': tensor(1.3800), 'WalkTime6': tensor(506.), 'NoT6': tensor(1.)}, '7': {'IVTT7': tensor(2064.), 'PT_fare7': tensor(1.3800), 'WalkTime7': tensor(506.), 'NoT7': tensor(1.)}, '8': {'IVTT8': tensor(2315.), 'PT_fare8': tensor(1.3800), 'WalkTime8': tensor(506.), 'NoT8': tensor(1.)}, '9': {'IVTT9': tensor(2295.), 'PT_far

In [103]:
class DotDict(dict):
    """
    A dictionary subclass that allows attribute access to its keys.
    """
    def __getattr__(self, attr):
        value = self.get(attr)
        if isinstance(value, dict):
            return DotDict(value)
        return value

args = DotDict({
    'batch_size': 32,
    'num_epochs': 100,
    'nll_tol': 0.001,
    'no_chg': 5,
    'lr': 0.001,
    'weight_decay': 0.000,
    'transform': 'exp',
    'l1': 0.000,
    'l2': 0.000,
    'taste_params': 8,
    'K': 4,
    'J': 3,
    'hidden_sizes': [],
    'act_func': '',
    'mu': 1.0,
    'model_no': 999,
    'cuda': False,
    'seed': None,
    'data_path': '../data',
    'result_root': '../results',
    'layer_sizes': [ds_test.z.size()[1]]+[]+[8],
})

In [156]:
import torch
import torch.nn as nn
from collections import OrderedDict
import torch.nn.functional as F

def get_act(nl_func):
    if nl_func=="tanh":
        return nn.Tanh()
    elif nl_func == "relu":
        return nn.ReLU()
    elif nl_func == "sigmoid":
        return nn.Sigmoid()
    else:
        return None
    
class ChoiceFlex(nn.Module):
    """TasteNet-MNL model for Swissmetro"""
    def __init__(self, args):
        super(ChoiceFlex, self).__init__()
        self.params_module = TasteParams(args.layer_sizes, args)
        self.util_module = Utility(args)
        self.args = args
    
    def forward(self, z, x, av):
        b = self.params_module(z) # taste parameters, (N,8)
        print("Shape of b: ", b.shape, b)
        b = self.constraints(b)  ## this is another way to include constraint:  using transformation to include constraints 
        print("Shape of b after constraints: ", b.shape, b)
        v = self.util_module(x,b) #no softmax here 
        print("Shape of v: ", v.shape)
        exp_v = torch.exp(v)
        print("Shape of exp_v: ", exp_v.shape)
        exp_v_av = exp_v * av
        print("Shape of exp_v_av: ", exp_v_av.shape)
        
        prob = exp_v_av/exp_v_av.sum(dim=1).view(-1,1) # prob (N,J)
        
        return prob, None  
    
    def constraints(self,b):
        '''
            Put transformation for the sake of constraints on the value of times 
        '''
        if self.args.transform=='relu':
            return torch.cat([-F.relu(-b[:,:-3]),b[:,-3:]],dim=1)
        elif self.args.transform == 'exp':
            return torch.cat([-torch.exp(-self.args.mu * b[:,:-3]),b[:,-3:]],dim=1) # the last 3 dim of b are under constraints
        else:
            return b
    
    def getParameters(self):
        '''
        get coef and bias of the TasteParams of the model 
        '''
        count = 0
        bias = []
        coef = []
        for params in self.parameters():
            if count % 2==0:
                coef.append(params)
            else:
                bias.append(params)
            count += 1
        return coef, bias
    
    def L2Norm(self):
        '''
        L2 norm, not including bias
        '''
        coef, bias = self.getParameters()
        norm = torch.zeros(1)
        for params in coef:
            norm += (params**2).sum()
        return norm            

    def L1Norm(self):
        '''
        L1 norm, not including bias
        '''
        coef, bias = self.getParameters()
        norm = torch.zeros(1)
        for params in coef:
            norm += (torch.abs(params).sum())
        return norm

class Utility(nn.Module):
    def __init__(self, args):
        super(Utility, self).__init__()
        self.args = args
        self.index = OrderedDict(zip(['TRAIN_TT', 'SM_TT', 'CAR_TT', 'TRAIN_HE', 'SM_HE', 'SM_SEATS', 'TRAIN_ASC', 'SM_ASC'], range(8)))

        
    def forward(self, x, b):
        '''
        x: attributes of each alternative, 
           including the intercept (N,K+1,J)  J alternatives, each have K+1 attributes including 1 for intercept. 
        b: taste parameters (K+1,J)  Some paramters are constant, some come from neural network hidden layer.  
        '''
        index = self.index
        N = len(b)        
        # last hidden nodes correspond to b_names
        v = torch.zeros(N,3)       
        print("Shape of v: ", v.shape) 
        v[:,0] = torch.ones(N) * b[:,index["TRAIN_ASC"]] + x["TRAIN"]["TRAIN_TT"]*b[:,index["TRAIN_TT"]] + x["TRAIN"]["TRAIN_HE"]*b[:,index["TRAIN_HE"]] - x["TRAIN"]["TRAIN_CO"]
        print("Shape of v[:,0]: ", v[:,0].shape, v[:,0])
        v[:,1] = torch.ones(N) * b[:,index["SM_ASC"]] + x["SM"]["SM_TT"]*b[:,index["SM_TT"]] + x["SM"]["SM_HE"]*b[:,index["SM_HE"]] + x["SM"]["SM_SEATS"]*b[:,index["SM_SEATS"]] - x["SM"]["SM_CO"]
        print("Shape of v[:,1]: ", v[:,1].shape, v[:,1])
        v[:,2] = x["CAR"]["CAR_TT"]*b[:,index["CAR_TT"]] - x["CAR"]["CAR_CO"]
        print("Shape of v[:,2]: ", v[:,2].shape, v[:,2])
        
        return v

class TasteParams(nn.Module):
    '''
    Network for tastes
    '''
    def __init__(self, layer_sizes, args):
        super(TasteParams, self).__init__()
        self.seq = nn.Sequential()
        for i, (in_size, out_size) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
            self.seq.add_module(name="L%i"%(i+1), module=nn.Linear(in_size, out_size, bias=True))
            if i<len(layer_sizes)-2:
                self.seq.add_module(name="A%i"%(i+1), module=get_act(args.act_func))
        self.args = args
        
    def forward(self,z):
        '''
        Parameters:
            z: (N,D) # batch size, input dimension
        Returns:
            V: (N,K) # taste parameters 
        '''
        N,D = z.size()
        test = self.seq(z)
        print("Shape of test: ", test.shape, test)
        return  test# (N,K) 

model = ChoiceFlex(args)


In [130]:
print(ds_test.z, ds_test[0]['x'], ds_test.av)

tensor([[0., 0., 0.,  ..., 1., 0., 0.],
        [1., 0., 1.,  ..., 1., 0., 0.],
        [1., 0., 1.,  ..., 1., 0., 0.],
        ...,
        [1., 0., 1.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]]) {'TRAIN': {'TRAIN_TT': tensor(1.1600), 'TRAIN_HE': tensor(0.3000), 'TRAIN_CO': tensor(0.3500)}, 'SM': {'SM_TT': tensor(0.7300), 'SM_HE': tensor(0.2000), 'SM_CO': tensor(0.4500), 'SM_SEATS': tensor(0.)}, 'CAR': {'CAR_TT': tensor(0.), 'CAR_CO': tensor(0.)}} tensor([[1., 1., 0.],
        [1., 1., 1.],
        [1., 1., 1.],
        ...,
        [1., 1., 1.],
        [1., 1., 0.],
        [1., 1., 1.]])


In [157]:
model.forward(ds_test.z, ds_test[0]['x'], ds_test.av)

Shape of test:  torch.Size([1604, 8]) tensor([[-0.0683,  0.0886,  0.1025,  ..., -0.0419,  0.4013, -0.2079],
        [ 0.0255,  0.3066, -0.4723,  ..., -0.2628,  0.2486, -0.2341],
        [-0.0694, -0.4241, -0.0321,  ...,  0.3155,  0.4547, -0.0091],
        ...,
        [ 0.0906,  0.0254, -0.2923,  ...,  0.2476,  0.4071, -0.0164],
        [-0.1186, -0.1864, -0.0915,  ...,  0.2632,  0.3812,  0.1247],
        [-0.4876,  0.4111, -0.2344,  ..., -0.2402,  0.1962, -0.0526]],
       grad_fn=<AddmmBackward0>)
Shape of b:  torch.Size([1604, 8]) tensor([[-0.0683,  0.0886,  0.1025,  ..., -0.0419,  0.4013, -0.2079],
        [ 0.0255,  0.3066, -0.4723,  ..., -0.2628,  0.2486, -0.2341],
        [-0.0694, -0.4241, -0.0321,  ...,  0.3155,  0.4547, -0.0091],
        ...,
        [ 0.0906,  0.0254, -0.2923,  ...,  0.2476,  0.4071, -0.0164],
        [-0.1186, -0.1864, -0.0915,  ...,  0.2632,  0.3812,  0.1247],
        [-0.4876,  0.4111, -0.2344,  ..., -0.2402,  0.1962, -0.0526]],
       grad_fn=<AddmmBackw

(tensor([[0.5451, 0.4549, 0.0000],
         [0.1613, 0.1602, 0.6785],
         [0.1944, 0.1007, 0.7049],
         ...,
         [0.1934, 0.1581, 0.6485],
         [0.4993, 0.5007, 0.0000],
         [0.0767, 0.2143, 0.7090]], grad_fn=<DivBackward0>),
 None)