# Imports

In [None]:
import os, pickle, json, gc
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from collections import Counter
import numpy as np, pandas as pd
from sklearn.model_selection import KFold

# Paths for files

In [None]:
pt_testset = '../input/test-dataset-for-indoor-positioning-comp/test_data'
os.mkdir('./hist')
path_to_save ='./hist'
path_to_train = '../input/training-dataset-for-indoor-positioning-comp/train_data'
path_to_sample = '../input/indoor-location-navigation/sample_submission.csv'

# Auxiliary Code

This Notebook will contain the code necessary for 

In [None]:
def gen_for_serialisation(path_to_train):
    site_data_files = [x for x in os.listdir(path_to_train)]
    for file in site_data_files:
        #../input/indoor-positioning-traindata/train_data
        f = open(path_to_train +'/'+file, "rb")
        site, train, ground = pickle.load(f)
        f.close()
        yield site,train,ground
def get_x_y_floor(truth_array):
    xs = list()
    ys = list()
    floors = list()
    for lst in truth_array:
        xs.append(lst[0])
        ys.append(lst[1])
        floors.append(lst[2])
    return xs,ys,floors

def get_data_for_test(pt_test,site):
    test_data = None
    with open("{pt_test}/{site}.pickle".format(pt_test=pt_test,site=site), "rb") as f:
        test_data = pickle.load(f)
    return test_data

# Model Definition

Run this code to create model

In [None]:
def create_model(input_d):
    w1 = int(input_d[0]/2)
    w2 = int(input_d[0]/4)
    model = keras.Sequential()
    #model.add(layers.Flatten(input_shape=input_d))
    #model.add(keras.layers.Flatten(input_shape=(input_d[0])))
    model.add(layers.Dense(w1, activation='sigmoid', input_shape=(input_d[0],)))
    model.add(layers.Dense(w2, activation='relu'))
    model.add(layers.Dense(w1, activation='sigmoid'))
    model.add(layers.Dense(1))
    model.compile(optimizer=tf.optimizers.Adam(lr=0.01),
                  loss='mse', 
                  metrics=['mean_squared_error'])
    #model.summary()
    return model

# Fitting

In [None]:
#fit to the neural net
def fit(train_data, target_data, experiment_no, path_to_save, test_data):
    target_data = np.asarray(target_data).astype(np.float)
    train_data = np.asarray(train_data).astype(np.float)
    predictions = dict()
    # Define the K-fold Cross Validator
    kfold = KFold(n_splits=10, shuffle=True)
    # K-fold Cross Validation model evaluation
    fold_no = 1
    for train, test in kfold.split(train_data, target_data):
        model = create_model(train_data[0].shape)
        history = model.fit(
                            train_data[train], 
                            target_data[train], 
                            batch_size=32,
                            verbose=1, 
                            epochs=1,
                            callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)],
                            validation_data=(train_data[test],target_data[test]))
        with open('{pts}/{site}_{fold}_NN{exp_no}.pickle'.format(pts=path_to_save,site=site, fold=fold_no, exp_no=experiment_no), 'wb') as f:
            pickle.dump(history.history, f)
        if fold_no == 1:
            for feat, timestamp in test_data:
                feat1 = np.asarray(feat).astype(np.float)
                predictions[timestamp] = model.predict(np.array([feat1,]))[0]
        else:
            for feat, timestamp in test_data:
                feat1 = np.asarray(feat).astype(np.float)
                predictions[timestamp] = np.add(predictions[timestamp], model.predict(np.array([feat1,]))[0] )

        fold_no = fold_no +1
    for key in predictions.keys():
        predictions[key] = [x/10 for x in predictions[key]]
    return predictions

#fits models for x,y, and floor seperately.
def fit_model_site_all_three_model(site, train_data, target_data, path_to_save, exp_no, test_data):
    train_data = np.array(train_data)
    xs,ys,floors = get_x_y_floor(target_data)
    target_data = {"xs": xs, "ys": ys, "floors" :floors}
    test_df = None
    for target in ["xs", "ys", "floors"]:
        preds = fit(train_data, target_data[target], "NN{exp_no}_{name}.pickle".format(exp_no=exp_no, name=target), path_to_save, test_data)
        if target == "xs":
            test_df = pd.DataFrame.from_dict(preds, orient='index', columns=['x'])
            test_df["timestamp"] = test_df.index
        elif target == "ys":
            test_df["y"] = test_df["timestamp"].map(preds)
            test_df["y"] = test_df["y"].apply(lambda x: x[0])
        elif target == "floors":
            test_df["floor"] = test_df["timestamp"].map(preds)
            test_df["floor"] = test_df["floor"].apply(lambda x: x[0])
    return test_df

# Actual Training

In [None]:
sample_dfs = list()
gen = gen_for_serialisation(path_to_train)
for site, train, truth in gen:
    test_data = get_data_for_test(pt_testset, site)
    sample_dfs.append( fit_model_site_all_three_model(site,train, truth, path_to_save, "03", test_data))

sample_df = pd.concat(sample_dfs)
index = get_sample_submission_index(path_to_sample)
sample_df = sample_df.reindex(index).fillna(0)
sample_df.to_csv("./sample_submission.csv")