# **Coordinate to impulse space Neural Network** 

In this notebbok I will attempt to create Neural Network that learns transformation from coordinate to impulse space. Theoretically it should work well for particles that were born in origin.

First I'm going to implement basic NN that learns on entire data set.
Then I'll remove non origin particles from training set.

In [None]:
# Imports

import os

import numpy as np
import pandas as pd

from trackml.dataset import load_event
from trackml.randomize import shuffle_hits
from trackml.score import score_event

In [None]:
# Load data

data_path = '../Data/train_100_events/'
event_prefix = 'event000001000'
hits, cells, particles, truth = load_event(os.path.join(data_path, event_prefix))

In [None]:
from sklearn.utils import shuffle
hits = shuffle(hits)

In [None]:
particles.head()

In [None]:
x_train = [] # np.transpose([hits.x.values, hits.y.values, hits.z.values])
y_train = []
progress = 0

for index, hit in hits.iterrows():
    hit_id = hit['hit_id']
    truth_of_hit = truth.loc[truth.hit_id == hit_id]
    particle_id = truth_of_hit['particle_id'].values[0]    
    if particle_id == 0: continue
        
    particle_info = particles[particles.particle_id == particle_id]
    charge = particle_info['q'].values[0]    
    if charge > 0: continue
        
    origin = [particle_info['vx'].values[0],
              particle_info['vy'].values[0]]
    
    if np.linalg.norm(origin) > 0.1: continue
        
    coordinates = [hit['x'], hit['y'], hit['z']]
    impulse = [particle_info['px'].values[0],
               particle_info['py'].values[0],
               particle_info['pz'].values[0]]
    
    x_train.append(coordinates)
    y_train.append(impulse)
    
    if len(x_train)%1000 == 0 : 
        progress+=1
        print(progress)

In [None]:
np.set_printoptions(precision=5, suppress=True)
x_train = np.asarray(x_train)
y_train = np.asarray(y_train)
print(x_train[0:10])
print(y_train[0:10])

In [None]:
max_coordinates = np.amax(x_train, axis=0)

In [None]:
# Import keras

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, BatchNormalization, Lambda
from keras.initializers import TruncatedNormal

In [None]:
# Define model
def model_architecture():
    model = Sequential()
    
    initializer = TruncatedNormal(stddev=0.1)
    
    # Normalization layer
    model.add(Lambda(lambda x: x / 1000))
    
    model.add(Dense(100, kernel_initializer=initializer, activation='relu'))
    #model.add(Dropout(0.1))
    
    model.add(Dense(1000, kernel_initializer=initializer, activation='relu'))
    
    model.add(Dense(3, kernel_initializer=initializer))
    
    return model

In [None]:
# Init model
model = model_architecture()

In [None]:
# Compiling the model using categorical_crossentropy loss, and rmsprop optimizer.
from keras.optimizers import Adam
optimizer = Adam(lr=0.001)
model.compile(loss='mse',
              optimizer=optimizer,
              metrics=['mae'])

In [None]:
# Running and evaluating the model
history = model.fit(x_train, y_train,
                    batch_size=1000,
                    epochs=100)

In [None]:
lim = 10
y_predict = model.predict(x_train[0:lim], batch_size=lim)
print(x_train[0:lim])
print(y_predict[0:lim])
print(y_train[0:lim])

Try clustering
---

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN

def predict(X, eps=0.5):
    cl = DBSCAN(eps, min_samples=1, algorithm='kd_tree')
    labels = cl.fit_predict(StandardScaler().fit_transform(X))
    return labels

#score
def create_one_event_submission(event_id, hits, labels):
    sub_data = np.column_stack(([event_id]*len(hits), hits.hit_id.values, labels))
    submission = pd.DataFrame(data=sub_data, columns=["event_id", "hit_id", "track_id"]).astype(int)
    return submission

def try_clustering(hits, X):
    labels = predict(X)
    submission = create_one_event_submission(0, hits, labels)
    score = score_event(truth, submission)
    print("Your score: ", score)

In [None]:
X = np.transpose([hits.x.values, hits.y.values, hits.z.values])

y_predict = model.predict(X, batch_size=10000)

try_clustering(hits, y_predict)

In [None]:
y_predict

TODO tomorrow
---
Ok so now I have to separate regression into 3 different networks.
Also adjust data generation, may be generate csv with coordinates to impulse mapping.