In [1]:
import pandas as pd
import numpy as np 
import io

import matplotlib.pyplot as plt
import matplotlib.patches as patches
from IPython.display import clear_output


from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import accuracy_score
from sklearn import linear_model

from keras.layers import Input, Dense, Lambda 
from keras import backend as K 
from keras import objectives 
from scipy.stats import norm 
from keras.utils import plot_model
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras import regularizers
from keras.models import Model, load_model

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Core Preliminary Functions 

In [2]:
def stacking(groups):
    X_group = np.empty(shape=[0, max_length])
    y_group = np.empty(shape=[0, 2])
    for i in range(len(groups)):
        hash_ = np.array([[]])
        group = groups[i]
        group = group.drop('hash', axis = 1)
        for index, row in group.iterrows():
            hash_ = np.append(hash_,np.array([row[:]]))
  
        y_group = np.r_[y_group,np.array([hash_[np.shape(hash_)[0]-2:np.shape(hash_)[0]]])] # putting the output away
        hash_ = np.delete(hash_,[np.shape(hash_)[0]-2,np.shape(hash_)[0]-1]) # removing the output
  
        num_pad = max_length - np.shape(hash_)[0] # padding with 0s
        hash_ = np.append(hash_,np.zeros(num_pad))
  
        X_group = np.r_[X_group,np.array([hash_])] # changing dimemsions of hash_
    return X_group, y_group


def stacking_by_chunks(df):
    df = df.drop('trajectory_id',axis = 1)
    grouped = df.groupby('hash')
    X = np.empty(shape=[0, max_length])
    y = np.empty(shape=[0, 2])  
    num_groups = grouped['hash'].unique().count()
    num_iter = 100
    chunk_size = int(num_groups/num_iter)
    #last_chunk = num_groups - chunk_size*num_iter
    
    for i in range(1,num_iter+1):
        groups = [g[1] for g in list(grouped)[(i-1)*chunk_size:i*chunk_size]]
        X_group, y_group = stacking(groups)
        X = np.r_[X,X_group]
        y = np.r_[y,y_group]
    
    groups = [g[1] for g in list(grouped)[num_iter*chunk_size:num_groups]] 
    #last chunk
    X_group, y_group = stacking(groups)
    X = np.r_[X,X_group]
    y = np.r_[y,y_group]
    
    return X,y

def city(y,bonus=[0,0]):
    city = np.array([[]])
    bonus = bonus 
    for i in range(np.shape(y)[0]):
        if y[i][0] > 3750901.5068-bonus[0] and y[i][0] < 3770901.5068+bonus[0] and y[i][1] > -19268905.6133-bonus[1] and y[i][1] < -19208905.6133+bonus[1]:
            city = np.append(city,1)
        else:
            city = np.append(city,0)
    return city  


## Loading Data 

In [3]:
X_train = pd.read_csv('X_train.csv',index_col=0)
y_train = pd.read_csv('y_train.csv',index_col=0)
X_train = pd.DataFrame(X_train)
y_train = pd.DataFrame(y_train)

FileNotFoundError: File b'X_train.csv' does not exist

# 1. Neural Network 

In [None]:
# spliting test/train data 
from sklearn.preprocessing import StandardScaler
X_train = StandardScaler().fit_transform(X_train)
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=5)

In [None]:
# model Architecture 
input_dim = X_train.shape[1]
input_layer = Input(shape=(input_dim, ))
layer_1 = Dense(250, activation="tanh", activity_regularizer=regularizers.l1(10e-5))(input_layer)
layer_2 = Dense(200, activation="relu",activity_regularizer=regularizers.l1(10e-5))(layer_1)
layer_3 = Dense(150, activation='tanh')(layer_2)
layer_4 = Dense(100, activation='relu')(layer_3)
layer_5 = Dense(75, activation='relu')(layer_4)
layer_6 = Dense(50, activation='relu')(layer_5)
layer_7 = Dense(2, activation='sigmoid')(layer_6)
autoencoder = Model(inputs=input_layer, outputs=layer_7)

In [None]:
#TRaining the Model and saving it 
nb_epoch = 200
batch_size = 100
autoencoder.compile(optimizer='adam', 
                    loss='mean_squared_error', 
                    metrics=['accuracy'])
checkpointer = ModelCheckpoint(filepath="model.h5",
                               verbose=1,
                               save_best_only=True)
tensorboard = TensorBoard(log_dir='./logs',
                          histogram_freq=0,
                          write_graph=True,
                          write_images=True)
history = autoencoder.fit(X_train, y_train,
                    epochs=nb_epoch,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(X_test, y_test),
                    verbose=1,
                    callbacks=[checkpointer, tensorboard]).history
autoencoder = load_model('model.h5')

In [None]:
# accuracy 
y_test = np.array(y_test)
bonus = [500,2000]
print (classification_report(city(y_test,bonus=bonus), city(y_multirf,bonus=bonus),digits = 4))
print (confusion_matrix(city(y_test,bonus=bonus), city(y_multirf,bonus=bonus)))
print (accuracy_score(city(y_test,bonus=bonus), city(y_multirf,bonus=bonus)))

# plotting 
plt.figure()
s = 20
a = 0.8
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(y_multirf[:, 0], y_multirf[:, 1], edgecolor='k',
            c="navy", s=s, marker="s", alpha=a, label="Data")
y_test = np.array(y_test)
ax.scatter(y_test[:, 0], y_test[:, 1], edgecolor='k',
            c="red", s=s, marker="s", alpha=a, label="Data")
rect1 = patches.Rectangle((3750901.5068,-19268905.6133),20000,60000,linewidth=1,edgecolor='r',facecolor='none')
rect2 = patches.Rectangle((3750901.5068-bonus[0],-19268905.6133-bonus[1]),20000+2*bonus[0],60000+2*bonus[1],linewidth=1,edgecolor='r',facecolor='none')
ax.add_patch(rect1)
ax.add_patch(rect2)
plt.show()

In [None]:
X_test = pd.read_csv('X_test.csv',index_col=0)
X_test = pd.DataFrame(X_test)
y_predict = autoencoder.predict(X_test)
# downloading results
result = pd.DataFrame(y_predict)


hash_names = pd.DataFrame(pd.read_csv('data_test.csv'))
hash_names = hash_names[hash_names["x_exit"].isna()]["trajectory_id"]
hash_names = pd.DataFrame(hash_names).values.flatten()
hash_names = pd.DataFrame(hash_names)

results = pd.concat([hash_names, result],axis =1)
results.to_csv('Final_Results.csv')
results.head()
result.describe()