In [1]:
import pickle
import numpy as np
#import tensorflow as tf
from sklearn.preprocessing import scale

#from keras.models import Sequential
#from keras.optimizers import SGD, Adam, Adagrad
#from keras import backend as K
#from keras.layers import Embedding
#from keras.layers import Dense, Reshape, Concatenate, Activation, Dropout
#from keras.callbacks import ModelCheckpoint

import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, TensorDataset

In [None]:
"""
train_cache = 'cache/train.pickle'
train_labels_len_cache = 'cache/train-labels-len.npy'
train_labels_dist_cache = 'cache/train-labels-dist.npy'

validation_cache = 'cache/validation.pickle'
validation_labels_len_cache = 'cache/validation-labels-len.npy'
validation_labels_dist_cache = 'cache/validation-labels-dist.npy'

test_cache = 'cache/test.pickle'
test_labels_len_cache = 'cache/test-labels-len.npy'
test_labels_dist_cache = 'cache/test-labels-dist.npy'

competition_test_cache = 'cache/competition-test.pickle'
metadata_cache = 'cache/metadata.pickle'
"""

In [2]:
train_cache = 'cache/train.pickle'
train_labels_cache = 'cache/train-labels.npy'
validation_cache = 'cache/validation.pickle'
validation_labels_cache = 'cache/validation-labels.npy'
test_cache = 'cache/test.pickle'
test_labels_cache = 'cache/test-labels.npy'
competition_test_cache = 'cache/competition-test.pickle'
metadata_cache = 'cache/metadata.pickle'

In [3]:
train = pd.read_pickle(train_cache)
validation = pd.read_pickle(validation_cache)
test = pd.read_pickle(test_cache)

train_labels = np.load(train_labels_cache)
validation_labels = np.load(validation_labels_cache)
test_labels = np.load(test_labels_cache)

competition_test = pd.read_pickle(competition_test_cache)
with open(metadata_cache, 'rb') as handle:
    metadata = pickle.load(handle)

In [4]:
#def process_features(df):
#    return torch.tensor(df[['QUARTER_HOUR','DAY_OF_WEEK','WEEK_OF_YEAR','ORIGIN_CALL_ENCODED','TAXI_ID_ENCODED','ORIGIN_STAND_ENCODED',
#                           'STAND_LONGITUDE','STAND_LATITUDE']].values)
def process_features(df):
    return torch.tensor(df[['QUARTER_HOUR','DAY_OF_WEEK','WEEK_OF_YEAR','ORIGIN_CALL_ENCODED','TAXI_ID_ENCODED',
                            'ORIGIN_STAND_ENCODED']].values)
#def process_features(df):
#    return torch.tensor(df[['QUARTER_HOUR','DAY_OF_WEEK','WEEK_OF_YEAR']].values)

In [12]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.embed_quarter_hour = nn.Embedding(metadata['n_quarter_hours'], 10)
        self.embed_day_of_week = nn.Embedding(metadata['n_days_per_week'], 10)
        self.embed_week_of_year = nn.Embedding(metadata['n_weeks_per_year'],10)
        self.embed_client_ids = nn.Embedding(metadata['n_client_ids'],10)
        self.embed_taxi_ids = nn.Embedding(metadata['n_taxi_ids'],10)
        self.embed_stand_ids = nn.Embedding(metadata['n_stand_ids'],10)
        self.layer1 = nn.Sequential(
            nn.Linear(in_features=60, out_features=100),
            nn.ReLU()
        )
        self.layer2 = nn.Sequential(
            nn.Linear(in_features = 100, out_features = 40),
            nn.ReLU()
        )
        self.layer3 = nn.Sequential(
            nn.Linear(in_features = 40, out_features = 10),
            nn.ReLU()
        )
        self.output_layer = nn.Linear(in_features=10, out_features=1)
        #self.output_layer = nn.Linear(in_features=10, out_features=1)

    def forward(self, x):
        qhr = self.embed_quarter_hour(x[:,0].to(torch.int32))
        dow = self.embed_day_of_week(x[:,1].to(torch.int32))
        woy = self.embed_week_of_year(x[:,2].to(torch.int32))
        ci = self.embed_client_ids(x[:,3].to(torch.int32))
        ti = self.embed_taxi_ids(x[:,4].to(torch.int32))
        si =  self.embed_stand_ids(x[:,5].to(torch.int32))
        #x = torch.cat([qhr,dow,woy,ci,ti,si,x[:,6:]],axis=1)
        x = torch.cat([qhr,dow,woy,ci,ti,si],axis=1)
        #x = torch.cat([qhr,dow,woy],axis=1)
        x = x.to(torch.float32)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output_layer(x)
        return x

In [13]:
import torch.optim as optim
learning_rate = 0.001
criterion = nn.MSELoss()
num_epochs = 60

In [14]:
train_data = process_features(train)
train_data = TensorDataset(train_data, torch.tensor(train_labels))
validate_data = process_features(validation)
validate_data = TensorDataset(validate_data,torch.tensor(validation_labels))
BATCH_SIZE = 128
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(validate_data, batch_size = BATCH_SIZE,shuffle=True)
#device = 'cpu'
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
all_losses = []
train_loss = []

In [15]:
model = MLP()
model = model.to(device)

In [16]:
def validate(model, val_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    running_val_loss = 0.0
    
    with torch.no_grad():  # We don't need gradients for validation
        for inputs, targets in val_loader:
            # Move data to device
            inputs = inputs.to(device)
            targets = targets.to(device)

            # Reshape targets
            targets = torch.reshape(targets,(-1,1))

            # Forward pass
            outputs = model(inputs)
            
            # Calculate loss
            loss = torch.sqrt(criterion(outputs, targets))  # RMSE

            # Accumulate loss
            running_val_loss += loss.item()

    # Return average loss
    average_val_loss = running_val_loss / len(val_loader)
    return average_val_loss


In [17]:
def train_data(model, train_loader, val_loader, criterion, num_epochs):
    optimizer = torch.optim.AdamW(model.parameters(),lr = learning_rate)
    for epoch in range(num_epochs):
        # Training Phase 
        model.train()
        epoch_loss = 0
        for i,(x, y) in enumerate(train_loader,0):
            x = x.to(device)
            y = y.to(device).to(torch.float32)
            y = torch.reshape(y,(-1,1))
            optimizer.zero_grad()
            output = model(x)
            loss = torch.sqrt(criterion(output, y))#RMSE
            epoch_loss += loss.item()
            loss.backward()
            optimizer.step()
        all_losses.append(epoch_loss/len(train_loader))
        print(f"Epoch: {epoch+1} Training Loss:{epoch_loss/len(train_loader)}")

        # Validation Phase
        model.eval()
        with torch.no_grad():
            val_loss = 0
            for x_val, y_val in val_loader:
                x_val = x_val.to(device)
                y_val = y_val.to(device)
                y_val = torch.reshape(y_val,(-1,1))
                preds = model(x_val)
                val_loss += torch.sqrt(criterion(preds, y_val)).item() # RMSE
        print(f"Epoch: {epoch+1} Validation Loss:{val_loss/len(val_loader)}")
        PATH = f'model_state/model_epoch{epoch}.pth'     
        torch.save(model.state_dict(), PATH)

In [None]:
train_data(model, train_loader, val_loader, criterion, 50) # use the training function you defined
val_loss = validate(model, val_loader, criterion)

Epoch: 1 Training Loss:486.7239877424236
Epoch: 1 Validation Loss:467.8638427734375


In [None]:
model.load_state_dict(torch.load('model_state/model_epoch26.pth'))

In [None]:
import matplotlib.pyplot as plt
plt.figure()
plt.plot(torch.Tensor.cpu(torch.tensor(all_losses)))

In [None]:
def view_loss(model, test_set, test_label,criterion):
    total_loss=0
    test_dataset = TensorDataset(test_set, test_label)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)
    for x, y in test_loader:
        x = x.to(device)
        y = y.to(device)
        y = torch.reshape(y,(-1,1))
        output = model(x)
        loss = torch.sqrt(criterion(output, y))#RMSE
        total_loss += loss
    return total_loss

In [None]:
print(view_loss(model, process_features(test), torch.tensor(test_labels).to(torch.float32), criterion)/process_features(test).shape[0]*320)

In [None]:
predict_input = torch.tensor(process_features(competition_test)).to(device)
print(predict_input.shape)

In [None]:
def predict(model):
    predict_output = model(predict_input)
    return predict_output

In [None]:
out = predict(model)

In [None]:
embed_mlp_predict = pd.read_csv('test_public.csv')
embed_mlp_predict = embed_mlp_predict['TRIP_ID']
predict_tensor = out.to('cpu').detach().numpy().flatten()
embed_mlp_predict= pd.concat([embed_mlp_predict, pd.DataFrame(predict_tensor)], axis=1)
embed_mlp_predict = embed_mlp_predict.rename(columns={0: 'TRAVEL_TIME'})
embed_mlp_predict.head()

In [None]:
embed_mlp_predict.to_csv('Embedding_MLP.csv', index=False)

In [None]:
a = process_features(train)
print(a[:,0])
embedding1 = torch.nn.Embedding(96, 10)
embedding2 = torch.nn.Embedding()
print(embedding(a[0,0].to(torch.int32)))

In [None]:
x = process_features(train)[0:90]
embed_quarter_hour = nn.Embedding(metadata['n_quarter_hours'], 10)
embed_day_of_week = nn.Embedding(metadata['n_days_per_week'], 10)
embed_week_of_year = nn.Embedding(metadata['n_weeks_per_year'],10)
embed_client_ids = nn.Embedding(metadata['n_client_ids'],10)
embed_taxi_ids = nn.Embedding(metadata['n_taxi_ids'],10)
embed_stand_ids = nn.Embedding(metadata['n_stand_ids'],10)
qhr = embed_quarter_hour(x[:,0].to(torch.int32))
dow = embed_day_of_week(x[:,1].to(torch.int32))
woy = embed_week_of_year(x[:,2].to(torch.int32))
ci = embed_client_ids(x[:,3].to(torch.int32))
ti = embed_taxi_ids(x[:,4].to(torch.int32))
si =  embed_stand_ids(x[:,5].to(torch.int32))
x = torch.cat([qhr,dow,woy,ci,ti,si,x[:,6:]],axis=1)
print(x)
print(x.shape)

In [None]:
print(x[0:91,0])

In [None]:
def create_model(metadata):
    """
    Creates all the layers for our neural network model.
    """
      
    # Arbitrary dimension for all embeddings
    embedding_dim = 10

    # Quarter hour of the day embedding
    embed_quarter_hour = Sequential()
    embed_quarter_hour.add(Embedding(metadata['n_quarter_hours'], embedding_dim, input_length=1))
    embed_quarter_hour.add(Reshape((embedding_dim,)))

    # Day of the week embedding
    embed_day_of_week = Sequential()
    embed_day_of_week.add(Embedding(metadata['n_days_per_week'], embedding_dim, input_length=1))
    embed_day_of_week.add(Reshape((embedding_dim,)))

    # Week of the year embedding
    embed_week_of_year = Sequential()
    embed_week_of_year.add(Embedding(metadata['n_weeks_per_year'], embedding_dim, input_length=1))
    embed_week_of_year.add(Reshape((embedding_dim,)))

    # Client ID embedding
    embed_client_ids = Sequential()
    embed_client_ids.add(Embedding(metadata['n_client_ids'], embedding_dim, input_length=1))
    embed_client_ids.add(Reshape((embedding_dim,)))

    # Taxi ID embedding
    embed_taxi_ids = Sequential()
    embed_taxi_ids.add(Embedding(metadata['n_taxi_ids'], embedding_dim, input_length=1))
    embed_taxi_ids.add(Reshape((embedding_dim,)))

    # Taxi stand ID embedding
    embed_stand_ids = Sequential()
    embed_stand_ids.add(Embedding(metadata['n_stand_ids'], embedding_dim, input_length=1))
    embed_stand_ids.add(Reshape((embedding_dim,)))
    
    # GPS coordinates (5 first lat/long and 5 latest lat/long, therefore 20 values)
    coords = Sequential()
    coords.add(Dense(1, input_dim=2))

    # Merge all the inputs into a single input layer
    model = Sequential()
    preprocessing_layer = Concatenate([embed_quarter_hour, embed_day_of_week, embed_week_of_year,embed_client_ids,embed_taxi_ids,
                                           embed_stand_ids,coords])
    
    model.add(preprocessing_layer)

    # Simple hidden layer
    model.add(Dense(200))
    model.add(Activation('relu'))

    # Determine cluster probabilities using softmax
    model.add(Dense(50))
    model.add(Activation('relu'))

    model.add(Dense(1))

    # Compile the model
    optimizer = SGD(lr=0.01, momentum=0.9, clipvalue=1.)  # Use `clipvalue` to prevent exploding gradients
    model.compile(loss=tf.keras.metrics.RootMeanSquaredError(), optimizer=optimizer)
    
    return model


In [None]:
def start_new_session():
    """
    Starts a new Tensorflow session.
    """
    
    # Make sure the session only uses the GPU memory that it actually needs
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    
    session = tf.compat.v1.Session(config=config, graph=tf.compat.v1.get_default_graph())
    tf.compat.v1.keras.backend.set_session(session)

In [None]:
def full_train(n_epochs=100, batch_size=200, save_prefix=None):
    

    # Set up callbacks
    callbacks = []
    if save_prefix is not None:
        # Save the model's intermediary weights to disk after each epoch
        file_path="cache/%s-{epoch:03d}-{val_loss:.4f}.hdf5" % save_prefix
        callbacks.append(ModelCheckpoint(file_path, monitor='val_loss', mode='min', save_weights_only=True, verbose=1))

    # Create model
    start_new_session()
    model = create_model(metadata)
    
    # Run the training
    history = model.fit(
        process_features(train), train_labels,
        epochs=n_epochs, batch_size=batch_size,
        validation_data=(process_features(validation), validation_labels),
        callbacks=callbacks)

    if save_prefix is not None:
        # Save the training history to disk
        file_path = 'cache/%s-history.pickle' % save_prefix
        with open(file_path, 'wb') as handle:
            pickle.dump(history.history, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
    return history

In [None]:
 full_train(n_epochs=100, batch_size=200, save_prefix='mymodel')