# Import package section...

In [None]:
import pandas as pd
import csv
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import os
import glob
from math import sqrt
from collections import defaultdict, Counter
from sys import stdout
from time import time
import pickle

print("Packages successfully imported!!")

# 1. File loaders...

In [None]:
pre = "pre_files"
norm = "norm_files"
init_wt = "init" # not needed
trained = "weights"

# load a file as a dataframe
def csv_files(loc, name):
    
    os.chdir(loc)
    pdx = pd.read_csv(name)
    os.chdir("..")
    
    # remove extra enumerated column
    return pdx.iloc[:, 1:]


# load a file as an numpy file
def npy_files(loc, name):
    
    os.chdir(loc)
    npx = np.load(name)
    os.chdir("..")
    
    return npx


# create a dictionary of required load functions
fload_dict = dict()
fload_dict['csv_files'] = csv_files
fload_dict['npy_files'] = npy_files


# dictionary creation and lookup functions
def lookup(cur_dict):
    
    keyset = list(cur_dict.keys())
    valset = list(cur_dict.values())
    
    # look into the dictionary
    '''for i in range(len(keyset)):
        print(keyset[i], ' ---> ', valset[i])'''
        
    return keyset, valset

# join up a dictionary
def join_dict(keyset, valset):
    
    new_dict = dict()
    for i in range(len(keyset)):
        new_dict[keyset[i]] = valset[i]
        
    return new_dict


# load the list of file names
def file_list(direc, ext):
    
    # gathering the files just in case
    list_files = []
        
    os.chdir(direc)
    print(os.getcwd())
    
    f_ext = "*." + ext
    for files in glob.glob(f_ext):
        list_files.append(files)
        
    os.chdir('..')
    print(list_files)
    print(os.getcwd())
    print("*****************\n")
    return list_files


# return dictionaries of file list
def file_dict(f_list, loc, f_type):
    
    # empty list to store each file_data
    f_store = []
    
    # deal with removing e
    # load each file_data
    for fn in f_list:
        # check the file extension
        loader = fload_dict[f_type]
        fi = loader(loc, fn)    
        f_store.append(fi)
        
    print("Num of files: ", len(f_list), "\nNum of files loaded: ", len(f_store))
    
    # dictionary to store file_names : file_data
    f_dict = join_dict(f_list, f_store)
        
    print("Dictionary compiled!!", len(f_dict.keys()))
    return f_dict


# load up the index from a csv file
def load_idx(fnames):
    
    import csv
    fvals = []
    os.chdir('mapping')
    
    for fname in fnames:
        with open(fname, 'rt') as f:
            cur_csv = csv.reader(f)
            idx = defaultdict(list)
            
            for line in cur_csv:
                for val in range(3, int(line[2])+3):

                    k1 = int(line[0])
                    k2 = int(line[1])
                    vv = int(line[val])

                    idx[k1, k2].append(vv)
                    
        print("File ", fname, " has been loaded")
        fvals.append(idx)

    os.chdir('..')
    
    fin_dict = join_dict(fnames, fvals)
    return fin_dict

In [None]:
os.chdir('..')
os.getcwd()

# Apply file and dictionary loaders...

In [None]:
# list of file names

# get the list of csv files for the dataframes
csv_files = file_list(pre, "csv")

# get the list of npy files for each normalised array
npy_files = file_list(norm, "npy")

# load up the trained weights data file names
wt_files = file_list(trained, "npy")

# load up the resultant mappings
map_files = file_list("mapping", "csv")

In [None]:
# set up the dictionary of csv dataframe file names to file dataframe data
df_csv = file_dict(csv_files, pre, "csv_files")

# set up the dictionary of npy numpy array file names to file numpy data
num_npy = file_dict(npy_files, norm, "npy_files")

# load the trained weights data files 
tr_wt = file_dict(wt_files, trained, "npy_files")

# setting up all the mappings
tr_map = load_idx(map_files)

In [None]:
#os.chdir('..')
os.getcwd()

In [None]:
tr_wt['ten_re_wt.npy'].shape

In [None]:
tr_map.keys(), tr_wt.keys()

# Plotting attempts for thresholding...

In [None]:
# function to sort values and find the thresh point
def plot_sort(df_csv, field):
    df_k, df_v = lookup(df_csv)
    new_k, new_v = df_k, []
    
    for i in range(len(df_k)):
        cur_k = df_k[i]
        cur_v = df_v[i]
        
        print("Data of shape: ", cur_v.shape, " plotting begins...")
        # sorting here
        new_df = cur_v.sort_values(field)
        cur_fi = new_df[field]
        new_v.append(new_df)
        
        print("Sorting ", cur_k, " by ", field)
        for i in range(cur_fi.shape[0]):
            if(i%100  == 0):
                plt.plot(i, cur_fi.iloc[i], 'r.')
        plt.show()
        
    sort_di = join_dict(new_k, new_v)
    return sort_di

In [None]:
field_li = list(df_csv['al.csv'].columns.values)
sort_k, sort_v = [], []

for i in range(1, len(field_li)):
    sort_k.append(field_li[i])
    vv = plot_sort(df_csv, field_li[i])
    sort_v.append(vv)

sorted_di = join_dict(sort_k, sort_v)
sorted_di.keys()

# 2. Creating labels...

In [None]:
#### helper functions

# function to oberve and print shapes within each dictionary
def dict_shape(cur_dict):
    
    key = list(cur_dict.keys())
    for k in key:
        s = cur_dict[k].shape
        print("Shape of ", k, ": ", s)
    return

# function to find the number of junctions
def junc(pdx):
    # pdx: dataframe used to find out what label to apply
    
    junc_pts = []
    
    for i in range(pdx.shape[0]):
        # compare using the starting time
        start = pdx.iloc[0, 0]
        
        if pdx.iloc[i, 0] == start:
            junc_pts.append(i)
            
    junc_pts.append(pdx.shape[0])
    return junc_pts

# Combined label...

In [None]:
def field_lab(df_dict, field, thr1, thr2):
    df_k, df_v = lookup(df_dict)
    la_k, la_v = [], []
    
    for i in range(len(df_k)):
        cur_k, cur_v = df_k[i], df_v[i]
        lab = np.zeros(pdx.shape[0], dtype='int').reshape(-1, 1)

        for i in range(pdx.shape[0]):
            if pdx[field][i] > thr:
                lab[i] = 1
            else:
                lab[i] = 0

        print("Labels of ", field, " of ", cur_k, " has been created: ", lab.shape)
        lab_n = cur_k.rstrip(".csv") + "_" + field
        la_k.append(lab_n)
        la_v.append(lab)
        
    fin_dict = join_dict(la_k, la_v)
    return fin_dict

# Brake label! --> Brake Pressure

In [None]:
def brake_lab(pdx, df_dict):
    # pdx: only uses dataframes type of data

    lab = np.zeros(pdx.shape[0], dtype='int').reshape(-1, 1)
    
    for i in range(pdx.shape[0]):
        if pdx['Brake pressure/Axle 1/Left'][i] > 0.01:
            lab[i] = 1
            
        else:
            lab[i] = 0
            
    print("Labels of 1D created based on left right acceleration...", lab.shape)
    return lab

# Speed label! --> Tangential Speed

In [None]:
df_csv['al.csv']['Tangential speed'].describe()

In [None]:
## threshold used, not for detection ##
def speed_lab(pdx, df_dict):
    # pdx: only uses dataframes type of data

    lab = np.zeros(pdx.shape[0], dtype='int').reshape(-1, 1)
    
    for i in range(pdx.shape[0]):
        # above thresh - high speed
        if pdx['Tangential speed'][i] > 76:
            lab[i] = 1
        # below thresh - lesser than high
        else:
            lab[i] = 0
            
    print("Labels of 1D created based on left right acceleration...", lab.shape)
    return lab

# Front/Back label! --> Tangent Acceleration X

In [None]:
df_csv['al.csv']['Tangent Acceleration/X'].describe()

In [None]:
## threshold used, not for detection ##
def froba_lab(pdx, df_dict):
    # pdx: only uses dataframes type of data

    lab = np.zeros(pdx.shape[0], dtype='int').reshape(-1, 1)
    
    for i in range(pdx.shape[0]):
        # forward motion
        if pdx['Tangent Acceleration/X'][i] > 0:
            lab[i] = 1
        # backward motion
        else:
            lab[i] = 0
            
    print("Labels of 1D created based on left right acceleration...", lab.shape)
    return lab

# Left/Right label! --> Tangent Acceleration Y

In [None]:
df_csv['al.csv']['Tangent Acceleration/Y'].describe()

In [None]:
# function to return 0 as -ve and 1 as +ve for acceleration along y
def lefri_lab(pdx, df_dict):
    # pdx: only uses dataframes type of data
    
    lab = np.zeros(pdx.shape[0], dtype='int').reshape(-1, 1)
    
    for i in range(pdx.shape[0]):
        if pdx['Tangent Acceleration/Y'][i] > 0:
            # +ve direction
            lab[i] = 1
        else:
            # -ve direction
            lab[i] = 0
            
    print("Labels of 1D created based on braking...", lab.shape)
    return lab

# Top/Bottom label! --> Tangent Acceleration Z

In [None]:
df_csv['al.csv']['Tangent Acceleration/Z'].describe()

In [None]:
# function to return 0 as -ve and 1 as +ve for acceleration along y
def topbot_lab(pdx, df_dict):
    # pdx: only uses dataframes type of data
    
    lab = np.zeros(pdx.shape[0], dtype='int').reshape(-1, 1)
    
    for i in range(pdx.shape[0]):
        if pdx['Tangent Acceleration/Z'][i] > 0:
            # top direction?
            lab[i] = 1
        else:
            # bottom direction?
            lab[i] = 0
            
    print("Labels of 1D created based on braking...", lab.shape)
    return lab

# Line Gap label?!

In [None]:
df_csv['al.csv']['Lane gap'].describe()

In [None]:
# function to return 0 as -ve and 1 as +ve for acceleration along y
def langap_lab(pdx, df_dict):
    # pdx: only uses dataframes type of data
    
    lab = np.zeros(pdx.shape[0], dtype='int').reshape(-1, 1)
    
    for i in range(pdx.shape[0]):
        if pdx['Lane gap'][i] > 0.077:
            # +ve lane gap?
            lab[i] = 1
        else:
            # -ve lane gap?
            lab[i] = 0
            
    print("Labels of 1D created based on braking...", lab.shape)
    return lab

# Split label - divide into distract & driver!

In [None]:
# function to place driver label on data
def split_lab(pdx, df_dict):
    # pdx: dataframe used to find out what label to apply
    
    # initialising the final label array
    label = np.zeros(pdx.shape[0], dtype='int').reshape(-1, 1)
    
    # get the junction points and create the label
    junctions = junc(pdx)
    tot = len(junctions)
    lab = 0 # the first label
    
    for i in range(tot-1):
        start = junctions[i]
        end = junctions[i + 1]
        
        # label the split set
        for i in range(start, end):
            label[i] = lab
        
        # go to next label
        lab = lab + 1
    
    print(junctions)
    print("Labels of 1D created based on split data...", label.shape)
    print("The labels are: ", set(label[:, 0]))
    return label

# Distraction label

In [None]:
# function to place driver label on data
def distract_lab(pdx):
    # pdx: dataframe used to find out what label to apply
    
    # initialising the final label array
    label = np.zeros(pdx.shape[0], dtype='int').reshape(-1, 1)
    
    # get the junction points and create the label
    junctions = junc(pdx)
    tot = len(junctions)
    lab = -1 # the first label
    
    for i in range(tot-1):
        start = junctions[i]
        end = junctions[i + 1]
        
        if i%40 == 0:
            # go to next label
            lab = lab + 1
        
        # label the split set
        for j in range(start, end):
            label[j] = lab
           
    print(junctions)
    print("Labels of 1D created based on split data...", label.shape)
    print("Testing labels:")
    for jj in junctions:
        print(label[jj])
    print("The labels are: ", set(label[:, 0]))
    return label

# Segment label !

In [None]:
# function to segment data into parts
def seg_lab(pdx, df_dict):
    # data: any whole set of data
    # label: only !!driver!! label
    
    
    # Step 1: initialise all required variables
    low = 0
    # initialise a label array to return
    new_lab = np.zeros(pdx.shape[0], dtype='int').reshape(-1, 1)
    # initialise number of segments (including 0, so 0 to 4)
    segments = 5
    # initialise segment array
    seg_arr = np.arange(segments, dtype='int')
    print(type(seg_arr[0]))
    
    # initialise points of driver split
    splits = junc(pdx)
    print("The splits are at: ", splits)
    
    
    # Step 3: segment data into partitions by individual driver
    # pick out each single driver
    for i in range(len(splits) - 1):
        start = splits[i]
        end = splits[i + 1]
        tot_pts = end - start
        limit = int(tot_pts / (segments)) # number of segments
        up = low
        no = 0
        print("Start: ", start, " End: ", end, " Pts in b/w: ", tot_pts, "Pts per seg: ", limit)
        
        # segment signal driver
        for pt in range(start, end):
            
            # deal with ending stuff
            if no == segments: # set out of limit label
                new_lab[pt] = seg_arr[seg_arr.shape[0]-1]
                
            else:
                # regular 0 to 4 segments label
                new_lab[pt] = seg_arr[no]
                up = up + 1 # note the number in segement

                if up == limit:
                    up = low # reset up
                    no = no + 1 # new label
                
    return new_lab                    

# Create label map dictionary...

In [None]:
# make a dictionary for what label creating function to be used

flab_dict = dict()
flab_dict['br_lab'] = brake_lab
flab_dict['sd_lab'] = speed_lab
flab_dict['fb_lab'] = froba_lab
flab_dict['lr_lab'] = lefri_lab
flab_dict['tb_lab'] = topbot_lab
flab_dict['lg_lab'] = langap_lab
flab_dict['sp_lab'] = split_lab
flab_dict['se_lab'] = seg_lab

# insert other functions here



# reset names between different types of files and what they are about
def reset_name(flist, rear):
    # flist: list of current file names (usually csv files)
    # lab_name: name of the label function
    # front: new appending character to denote change in file names (at start part)
    # rear: new file extension required (at end part)
    
    new_f = []
    
    for fi in flist:
        # change the first letter (new file type) (first letter)
        name = ''
        
        # keep adding letters upto '.'
        for idx in range(len(fi)):
            if fi[idx] == '.':
                break
            name = name + fi[idx]
        
        # add the new extension
        name = name + rear
        
        # complete by adding the new name to the list
        new_f.append(name)
        
    print(new_f)
    return new_f


# creating functions that return a dictionary of a particular label
def lab_dict(cur_dict, rear, lab_name):
    # cur_dict: dataframe dictionary
    # front: new appending character to denote change in file names
    # rear: new file extension required (at end part)
    # lab_name: the type of label being used
    
    # current keys
    cur_k = list(cur_dict.keys())
    
    # current values
    cur_val = list(cur_dict.values())
    
    # next key set
    nex_k = reset_name(cur_k, rear)
    
    # next value set
    nex_val = []
    
    # set label function
    labeler = flab_dict[lab_name]
    
    # creating the next dictionary
    for val in cur_val:
        nex = labeler(val, cur_dict)
        nex_val.append(nex)
        
    print("Len of new keys created..", len(nex_k))
    print("Len of new values created..", len(nex_val))
    
    # create the dictionary of new keys and values
    nex_dict = join_dict(nex_k, nex_val)
        
    return nex_dict

# Applying the label functions...

In [None]:
# create brake label using only the dataframe data
br_lab = lab_dict(df_csv, "_brl", "br_lab")

In [None]:
# create speed label using only the dataframe data
sd_lab = lab_dict(df_csv, "_sdl", "sd_lab")

In [None]:
# create axis x label using only the dataframe data
fb_lab = lab_dict(df_csv, "_fbl", "fb_lab")

In [None]:
# create axis y label using only the dataframe data
lr_lab = lab_dict(df_csv, "_lrl", "lr_lab")

In [None]:
# create axis z label using only the dataframe data
tb_lab = lab_dict(df_csv, "_tbl", "tb_lab")

In [None]:
# create lane gap label using only the dataframe data
lg_lab = lab_dict(df_csv, "_lgl", "lg_lab")

In [None]:
# create split driver label using only the dataframe data
sp_lab = lab_dict(df_csv, "_spl", "sp_lab")

In [None]:
di_lab = distract_lab(df_csv['al.csv'])

In [None]:
# create segment label x2 using only the dataframe data
s2_lab = lab_dict(df_csv, "_s2l", "se_lab")

In [None]:
# create segment label x3 using only the dataframe data
s3_lab = lab_dict(df_csv, "_s3l", "se_lab")

In [None]:
# create segment label x4 using only the dataframe data
s4_lab = lab_dict(df_csv, "_s4l", "se_lab")

In [None]:
# create segment label x5 using only the dataframe data
s5_lab = lab_dict(df_csv, "_s5l", "se_lab")

In [None]:
# create segment label x6 using only the dataframe data
s6_lab = lab_dict(df_csv, "_s6l", "se_lab")

In [None]:
# create segment label x7 using only the dataframe data
s7_lab = lab_dict(df_csv, "_s7l", "se_lab")

# Further lab mod (unexpected)...

In [None]:
# do the modification here
def reset_1lab(val0, br):
    # lab sent is a 1d vector
    new_lab = np.zeros(val0.shape[0], dtype='int').reshape(val0.shape[0], -1)
    cur_lab = 0
    new_pt = 1
    nex = 0
    
    for l in range(val0.shape[0]):
        vv = val0[l, 0]
        if vv != cur_lab:
            cur_lab = vv
            new_pt = new_pt + 1
            
        if new_pt % br == 0:
            nex = nex + 1
            new_pt = 1
            
        new_lab[l, 0] = nex
    return new_lab


# call the mod on any label
def mod_1lab(lab_dict, mod_key):
    
    keys, values = lookup(lab_dict)
    mod_dict = lab_dict.copy()
    
    for i in range(len(keys)):
        for j in range(len(mod_key)):
            if keys[i] == mod_key[j]:
                mod_dict[keys[i]] = reset_1lab(values[i], 4)
                break
    
    return mod_dict

# 3. Mapping window labels...

In [None]:
# function to find the mid threshold
def get_mid(midded):
    mid = int(midded.shape[1]/2)
    midthr = midded[:, mid]
    return midthr

# function to remap data in the form of a window
# !! works for both data and labels!!
def remap(data, window):
    
    x = data.shape[0] - window
    y = data.shape[1] * window
    
    if data.shape[1] == 1:
        remapped = np.zeros(x * y, dtype='int').reshape(x, y)
        
    else:
        remapped = np.zeros(x * y).reshape(x, y)
        

    j = 0 # final map setter
    
    for idx in range(0, data.shape[0]-window-1):
        
        vec = np.zeros(y).reshape(window, data.shape[1])
        i = 0 # each single vector
        
        for ar_id in range(idx, idx + window):
            vec[i, :] = data[ar_id, :]
            i = i + 1
            
        n_vec = vec.reshape(-1)
        
        remapped[j, :] = n_vec
        j = j + 1
        
    print("Label shape after window: ", remapped.shape)
    
    remap_mid = get_mid(remapped)
    return remap_mid

# remapping is required only for labels as weights are already trained on the windowed input data

# function to quickly create a dictionary
def remap_dict(cur_dict, window):
    # cur_dict: the dictionary of data to be windowed
    # window: the size of the window
    
    win_values = []
    
    cur_keys = list(cur_dict.keys())
    print(cur_keys)
    
    for val in cur_dict.values():
        win_val = remap(val, window)
        win_values.append(win_val)
        
    ret_dict = dict()
    for i in range(len(cur_keys)):
        ret_dict[cur_keys[i]] = win_values[i]
    
    print("Len of new set of values: ", len(win_values))
    return ret_dict

# Apply remapping...

In [None]:
# creating the windowed brake dictionary
br_remap = remap_dict(br_lab, 5)

In [None]:
# creating the windowed speed dictionary
sd_remap = remap_dict(sd_lab, 5)

In [None]:
# creating the windowed axis x dictionary
fb_remap = remap_dict(fb_lab, 5)

In [None]:
# creating the windowed axis y dictionary
lr_remap = remap_dict(lr_lab, 5)

In [None]:
# creating the windowed axis z dictionary
tb_remap = remap_dict(tb_lab, 5)

In [None]:
# creating the windowed lane gap dictionary
lg_remap = remap_dict(lg_lab, 5)

In [None]:
# creating the windowed  dictionary
sp_remap = remap_dict(sp_lab, 5)

In [None]:
di_remap = remap(di_lab, 5)

In [None]:
# creating the windowed split driver dictionary
s2_remap = remap_dict(s2_lab, 5)

In [None]:
# creating the windowed split driver dictionary
s3_remap = remap_dict(s3_lab, 5)

In [None]:
# creating the windowed split driver dictionary
s4_remap = remap_dict(s4_lab, 5)

In [None]:
# creating the windowed split driver dictionary
s5_remap = remap_dict(s5_lab, 5)

In [None]:
# creating the windowed split driver dictionary
s6_remap = remap_dict(s6_lab, 5)

In [None]:
# creating the windowed split driver dictionary
s7_remap = remap_dict(s7_lab, 5)

# 4. Load up the weights...

# Pre-set file names...

In [None]:
# all required file locations
hf = "hf_res"
mus = "mus_res"
tex = "tex_res"
com = "com_res"
first = "1st_res"
second = "2nd_res"
brake = "br_res"

# sub file locations
ntime = 'no_time'
wtime = 'with_time'
mth = 'max_thresh'
nth = 'no_thresh'

# file types
csv = "csv"
wt = "npy"
img = "png"

# 5. Thresholding the labels and other helpers...

# Thresholding Functions...

In [None]:
# plot the thresholded value based on the maximum count in each cluster
def plot_thresh(lab, ind, max_size, thresh_name, loc, save):
    # lab: 1d labels
    # ind: map to list index
    # thresh_name: add an extra name
    # loc: where to store the results
    # save: do you want to save the image (True or False)
    
    
    # set the markers and colors
    marker = ['o', '*', 's', 'D', '^', '+', 'X', '1', 'p', '>', 'x', 'p']
    color = ['r', 'g', 'b', 'y', 'k', 'm', 'c', 'y', 'r', 'g', 'b', 'y']
    
    plt.axis([0, max_size, 0, max_size])
    
    print("Markers: ", marker)
    print("Colours: ", color)
    print()
    
    cn = 0
    
    # open the map dictionary
    for key in ind.keys():
        
        # deal only with key having something in the list
        if(len(ind[key]) != 0):
            #print(key, " -- ", ind[key])
            #print("===============")
            lis = [0] * (len(set(lab)))
            
            # open a point in the list
            for pt in ind[key]:
                
                # deal only with points in the required label limit
                if pt < lab.shape[0]: 
                    res = lab[pt]
                    lis[res] = lis[res] + 1
                    maxi = max(lis)
            #print(lis)
            
            # locate the index of the maximum value and plot only that
            for i in range(len(lis)):
                if lis[i] == maxi:
                    #print("MAX: ", maxi)
                    pts = i
                    #print("MAX lab: ", pt)

            plt.plot(key[0]+0.5, key[1]+0.5, marker[pts], markerfacecolor='None', 
                            markeredgecolor=color[pts], markersize=4, markeredgewidth=1)
            
            '''if cn % 10000 == 0:
                print("Done mapping ", cn, " no. of points")
            cn = cn + 1'''
    
    plt.title(thresh_name)
    
    # save the labeled map
    if save == True:
        os.chdir(loc)
        plt.savefig(thresh_name)
        os.chdir('..')
        print("map saved!!")
        
    plt.show()

    
# plot the thresholded value based on the maximum count in each cluster
def plot_thrline(lab, ind, max_size, thresh_name, loc, save):
    # lab: 1d labels
    # ind: map to list index
    # thresh_name: add an extra name
    # loc: where to store the results
    # save: do you want to save the image (True or False)
    
    
    tot_lab = len(set(lab))
    colorlis = np.linspace(0, 0.85, tot_lab)
    
    plt.axis([0, max_size, 0, max_size])
    
    cn = 0
    
    # open the map dictionary
    for key in ind.keys():
        
        # deal only with key having something in the list
        if(len(ind[key]) != 0):
            #print(key, " -- ", ind[key])
            #print("===============")
            lis = [0] * (len(set(lab)))
            
            # open a point in the list
            for pt in ind[key]:
                
                # deal only with points in the required label limit
                if pt < lab.shape[0]: 
                    res = lab[pt]
                    lis[res] = lis[res] + 1
                    maxi = max(lis)
            #print(lis)
            
            # locate the index of the maximum value and plot only that
            for i in range(len(lis)):
                if lis[i] == maxi:
                    #print("MAX: ", maxi)
                    pts = i
                    #print("MAX lab: ", pt)

            plt.plot(key[0]+0.5, key[1]+0.5, '.', markerfacecolor='None', 
                            markeredgecolor=str(colorlis[pts]), markersize=4, markeredgewidth=1)
            
            '''if cn % 10000 == 0:
                print("Done mapping ", cn, " no. of points")
            cn = cn + 1'''
    
    plt.title(thresh_name)
    
    # save the labeled map
    if save == True:
        os.chdir(loc)
        plt.savefig(thresh_name)
        os.chdir('..')
        print("map saved!!")
        
    plt.show()
     
        
# plot the thresholded value based on the maximum count in each cluster
def plot_thrcolor(lab, ind, max_size, thresh_name, loc, save):
    # lab: 1d labels
    # ind: map to list index
    # thresh_name: add an extra name
    # loc: where to store the results
    # save: do you want to save the image (True or False)
    
    split = 10
    print(split)
    
    #color list for 40 labels
    '''
    colorlis = ['#02b058', '#042333', '#0841fa', '#11fd99', '#28f934', '#3636ee', '#3c91a7', '#3e3986', '#3e8ae0', '#419b63', 
                '#427b34', '#5c1c3a', '#649b7c', '#65eb69', '#74470b', '#77793c', '#7fbfd4', '#806be1', '#820022', '#a07475',
                '#a0f8fb', '#a3f96c', '#a4c00f', '#acf837', '#bd25f4', '#c27495', '#c55a2d', '#c5c8fc', '#c9dc9b', '#d18b2c',
                '#dcd6e7', '#e2fa2b', '#e8390a', '#edda36', '#ee5b21', '#f06752', '#f28cf0', '#f68e2f', '#fc1cc0', '#fed3ca']
    '''
    
    #color list for 20 labels
    
    colorlis = ['#0841fa', '#11fd99', '#28f934', '#042333', '#74470b', '#a07475', '#a4c00f', '#bd25f4', '#d18b2c', '#e8390a']
    
    print("legend: ")
    for i in range(len(colorlis)):
        print("Lab: ", i, " Color: ", colorlis[i])
        
    cpt = 0
    for y in range(5):
        for x in range(2):
            plt.plot(x, y, 's', color=colorlis[cpt])
            cpt = cpt + 1
    plt.show()
        
    plt.axis([0, max_size, 0, max_size])
    
    cn = 0
    
    # open the map dictionary
    for key in ind.keys():
        
        # deal only with key having something in the list
        if(len(ind[key]) != 0):
            #print(key, " -- ", ind[key])
            #print("===============")
            lis = [0] * (len(set(lab)))
            
            # open a point in the list
            for pt in ind[key]:
                
                # deal only with points in the required label limit
                if pt < lab.shape[0]: 
                    res = lab[pt]
                    lis[res] = lis[res] + 1
                    maxi = max(lis)
            #print(lis)
            
            # locate the index of the maximum value and plot only that
            for i in range(len(lis)):
                if lis[i] == maxi:
                    #print("MAX: ", maxi)
                    pts = i
                    #print("MAX lab: ", pt)

            pts = pts % 40
            pts = pts % split
            plt.plot(key[0]+0.5, key[1]+0.5, marker='.', markerfacecolor='None', 
                            markeredgecolor=colorlis[pts], markersize=4, markeredgewidth=1)
            
            '''if cn % 10000 == 0:
                print("Done mapping ", cn, " no. of points")
            cn = cn + 1'''
            
    
    plt.title(thresh_name)
    
    # save the labeled map
    if save == True:
        os.chdir(loc)
        plt.savefig(thresh_name)
        os.chdir('..')
        print("map saved!!")
        
    plt.show()


# plot the thresholded value based on the maximum count in each cluster
def plot_throne(lab, ind, max_size, thresh_name, loc, save):
    # lab: 1d labels
    # ind: map to list index
    # thresh_name: add an extra name
    # loc: where to store the results
    # save: do you want to save the image (True or False)
    
    fullset = list(set(lab))
    flis = []
    
    for s in fullset:
        plt.axis([0, max_size, 0, max_size])
        clis = []
        print("Current label: ", s)
        
        # open the map dictionary
        for key in ind.keys():

            # look into each key
            for pt in ind[key]:

                lval = lab[pt]
                if lval == s:
                    plt.plot(key[0]+0.5, key[1]+0.5, marker='.', color='b')
                    clis.append(key)
                    break

        uqlis = list(set(clis))
        flis.append(uqlis)
        na = thresh_name + str(s)
        plt.title(na)

        # save the labeled map
        if save == True:
            os.chdir(loc)
            plt.savefig(na)
            os.chdir('..')
            print("map saved!!")

        plt.show()
        
    return flis

    
# plot the thresholded value based on the maximum count in each cluster
def plot_thronev(lab, ind, max_size, thresh_name, loc, save):
    # lab: 1d labels
    # ind: map to list index
    # thresh_name: add an extra name
    # loc: where to store the results
    # save: do you want to save the image (True or False)
    
    fidx = []
    fullset = list(set(lab))
    
    for s in fullset:
        plt.axis([0, max_size, 0, max_size])
        widx = []
        # open the map dictionary
        print("Current label: ", s)
        
        # open the map dictionary
        for key in ind.keys():

            # deal only with key having something in the list
            if(len(ind[key]) != 0):
                #print(key, " -- ", ind[key])
                #print("===============")
                lis = [0] * (len(set(lab)))

                # open a point in the list
                for pt in ind[key]:

                    # deal only with points in the required label limit
                    if pt < lab.shape[0]: 
                        res = lab[pt]
                        lis[res] = lis[res] + 1
                        maxi = max(lis)
                #print(lis)

                # locate the index of the maximum value and plot only that
                for i in range(len(lis)):
                    if lis[i] == maxi:
                        #print("MAX: ", maxi)
                        pts = i
                        #print("MAX lab: ", pt)

                if pts == s:
                    plt.plot(key[0]+0.5, key[1]+0.5, marker='.', color='b')
                    widx.append(key)
                    
                #else:
                    #plt.plot(key[0]+0.5, key[1]+0.5, marker='.', color='y')


        na = thresh_name + str(s)
        plt.title(na)

        # save the labeled map
        if save == True:
            os.chdir(loc)
            plt.savefig(na)
            os.chdir('..')
            print("map saved!!")

        plt.show()
        fidx.append(widx)
        
    return fidx


# function to locate junctions between label classes
def ret_junc(label):
    junc = []
    junc.append(0)
    for i in range(1, label.shape[0]):
        if label[i - 1] != label[i]:
            junc.append(i)
    junc.append(label.shape[0]-1)
    print(junc)
    return junc

In [None]:
# add more thresholding functions above

# 6. Apply the Mapping...

# To Thresh or Not to Thresh?

In [None]:
# function to work with more than one label after getting indices
def lab_thrline(label, index, size, name, loc, save):
    
    t_name = name + "_thrline"
    plot_thrline(label, index, size, t_name, loc, save)
    return


# function to work with more than one label after getting indices
def lab_thrcolor(label, index, size, name, loc, save):
    
    t_name = name + "_thrcolor"
    plot_thrcolor(label, index, size, t_name, loc, save)
    return


# function to work with more than one label after getting indices
def lab_thresh(label, index, size, name, loc, save):
    
    t_name = name + "_thr"
    plot_thresh(label, index, size, t_name, loc, save)
    return
 

# function to work with a single label over only one driver dataset
def lab_throne(label, index, size, name, loc, save):
    
    t_name = name + "_thr"
    wind = plot_throne(label, index, size, name, loc, save)
    return wind


# function to work with a single label over only one driver dataset
def lab_thronev(label, index, size, name, loc, save):
    
    t_name = name + "_thrvs"
    wind = plot_thronev(label, index, size, name, loc, save)
    return wind


## MOST LIKELY WILL NOT BE USED?!
# function that will do the labeling by-passing the thresholding function
def lab_nothresh(label, index, size, name, loc, save):
    
    # set the markers and colors
    marker = ['o', '*', 's', 'D', '^', '+']
    color = ['r', 'g', 'b', 'y', 'c', 'k']
    
    cn = 0
    
    plt.axis([0, size, 0, size])
    print("Beginning plotting - no threshold for ", name, '...')
    # open the index
    for key in index.keys():
        
        # get list of values per key
        li = index[key]
        la = []
        for pt in li:
            lab = label[pt]
            la.append(lab)
            
            if cn%10000 == 0:
                print("done with ", cn, " number of points...")
            cn = cn + 1
            
        stl = set(la)
        for ss in stl:
            plt.plot(key[0]+0.5, key[1]+0.5, marker[ss], markerfacecolor='None', 
                     markeredgecolor=color[ss], markersize=4, markeredgewidth=1)
            
            
    print("Plotting of ", name, ' done...')
    new_n = name + '_nothr'
    plt.title(new_n)
    
    if save == True:
        os.chdir(loc)
        plt.savefig(new_n)
        os.chdir('..')
        print("color map saved!!")
        
    plt.show()
    return

# Different variations of starter functions to SOMs...

# Partial runners...

In [None]:
#### NOTE: all functions below apply for only one type of label at a time (unless future modifications are made...)

# function to performs single data - single label - single weights labeling
## used as a test function in case of unknown errors
def onlyone_1lab(data, label, weight, loc, name, save):
    
    win, idx = slide_lab(data, weight, loc)
    lab_thresh(label, idx, weight.shape[0], loc, name, save)
    
    # create and return dictionary of mapped indices
    dict_idx = join_dict(wt_k, idx_li)
    return dict_idx


# function to perform single data - single label - multi weights labeling
## kind of a test function for SOM parameters
def single_1lab(data, label, lab_name, weight_dict, loc, thresh, save):
    
    wt_k, wt_v = sep_dict(weight_dict)
    
    idx_li = []
    
    # set the weights first
    for wt in range(len(wt_k)):
        
        save_n = wt_k[wt].rstrip('.npy')
        
        print("Starting ", wt_k[wt], ' ...')
        # keys are not needed
        cur_wt = wt_v[wt]
            
        # get the mapping
        win, idx = slide_lab(data, cur_wt, save_n, loc, save)
        idx_li.append(idx)

        # new name of the map
        name = "plt_" + lab_name

        # plot the mapping
        if thresh == True:
            lab_thresh(label, idx, cur_wt.shape[0], save_n, loc, save)
        else:
            lab_nothresh(label, idx, cur_wt.shape[0], save_n, loc, save)
            
    # create and return dictionary of mapped indices
    dict_idx = join_dict(wt_k, idx_li)
    return dict_idx

# function to modify all involved dictionaries
def mod_dict(da_d, la_d, wt_d, id_d, excepts):
    
    da_k, da_v = sep_dict(da_d)
    la_k, la_v = sep_dict(la_d)
    wt_k, wt_v = sep_dict(wt_d)
    id_k, id_v = sep_dict(id_d)
    
    da_m, la_m, wt_m, id_m = dict(), dict(), dict(), dict()
    f0 = 0
    
    for i in range(len(la_k)):
        for j in range(len(excepts)):
            if la_k[i] != excepts[j]:
                f0 = 0
            else:
                f0 = 1
        
        if f0 == 0:
            da_m[da_k[i]] = da_v[i]
            la_m[la_k[i]] = la_v[i]
            wt_m[wt_k[i]] = wt_v[i]
            id_m[id_k[i]] = id_v[i]
            
    return da_m, la_m, wt_m, id_m

In [None]:
'''import sys
!{sys.executable} -m pip install '''

# All runners...

In [None]:
# function to perform multi data - multi label - multi weight labeling
def all_1lab(data_dict, label_dict, weight_dict, loc, thresh, save):
    
    wt_k, wt_v = lookup(weight_dict)
    da_k, da_v = lookup(data_dict)
    la_k, la_v = lookup(label_dict)
    
    id_k, id_v = [], []
    
    # set the weights first
    for wt in range(len(wt_k)):
        
        save_n = wt_k[wt].rstrip('.npy')
        
        print("Starting ", wt_k[wt], ' ...')
        # keys are not needed
        
        # setting values for the current iteration
        cur_wt = wt_v[wt]
        cur_da = da_v[wt]
        cur_la = la_v[wt]
        
        # get the mapping
        win, idx = slide_lab(cur_da, cur_wt, save_n, loc, save)
        id_v.append(idx)
        id_k.append(save_n)
        
        # new name of the map
        name = "plt_" + save_n

        # plot the mapping
        if thresh == True:
            lab_thresh(cur_la, idx, cur_wt.shape[0], name, loc, save)
        else:
            lab_nothresh(cur_la, idx, cur_wt.shape[0], name, loc, save)
            
    print("Mapping Complete for ", save_n)
    # create and return dictionary of mapped indices
    dict_idx = join_dict(id_k, id_v)
    return dict_idx


## NOTE: only after we get the mappings of the SOM can we label them

# function to perform color scale labeling without the need for mapping
def all_1lab_thrline(label_dict, weight_dict, idx_dict, loc, ext, save):
    
    wt_k, wt_v = lookup(weight_dict)
    id_k, id_v = lookup(idx_dict)
    la_k, la_v = lookup(label_dict)
    
    # set the weights first
    for wtid in range(len(wt_k)):
        
        save_n = wt_k[wtid].rstrip('.npy')
        
        print("Starting ", wt_k[wtid], ' ...')
        # keys are not needed
        
        # setting values for the current iteration
        cur_wt = wt_v[wtid]
        cur_id = id_v[wtid]
        cur_la = la_v[wtid]
        
        # new name of the map
        name = "plt_" + save_n + ext

        # plot the mapping
        lab_thrline(cur_la, cur_id, cur_wt.shape[0], name, loc, save) 
            
        print("Plotting done!!\n")

    return

# function to perform multi color labeling without the need for mapping
def all_1lab_thrcolor(label_dict, weight_dict, idx_dict, loc, ext, save):
    
    wt_k, wt_v = lookup(weight_dict)
    id_k, id_v = lookup(idx_dict)
    la_k, la_v = lookup(label_dict)
    
    # set the weights first
    for wtid in range(len(wt_k)):
        
        save_n = wt_k[wtid].rstrip('.npy')
        
        print("Starting ", wt_k[wtid], ' ...')
        # keys are not needed
        
        # setting values for the current iteration
        cur_wt = wt_v[wtid]
        cur_id = id_v[wtid]
        cur_la = la_v[wtid]
        
        # new name of the map
        name = "plt_" + save_n + ext

        # plot the mapping
        lab_thrcolor(cur_la, cur_id, cur_wt.shape[0], name, loc, save) 
            
        print("Plotting done!!\n")

    return


# function to perform multi color labeling without the need for mapping
def all_1lab_throne(wtid, label_dict, weight_dict, idx_dict, loc, ext, save):
    
    wt_k, wt_v = lookup(weight_dict)
    id_k, id_v = lookup(idx_dict)
    la_k, la_v = lookup(label_dict)
    
        
    save_n = wt_k[wtid].rstrip('.npy')

    print("Starting ", wt_k[wtid], ' ...')
    # keys are not needed

    # setting values for the current iteration
    cur_wt = wt_v[wtid]
    cur_id = id_v[wtid]
    cur_la = la_v[wtid]

    # new name of the map
    name = "plt_" + save_n + ext

    # plot the mapping
    thewins = lab_throne(cur_la, cur_id, cur_wt.shape[0], name, loc, save) 

    print("Plotting done!!\n")

    return thewins


# function to perform one vs all mapping with thresholding
def all_1lab_thronev(thisid, label_dict, weight_dict, idx_dict, loc, ext, save):
    
    wt_k, wt_v = lookup(weight_dict)
    id_k, id_v = lookup(idx_dict)
    la_k, la_v = lookup(label_dict)
    wtid = thisid
    
        
    save_n = wt_k[wtid].rstrip('.npy')

    print("Starting ", wt_k[wtid], ' ...')
    # keys are not needed

    # setting values for the current iteration
    cur_wt = wt_v[wtid]
    cur_id = id_v[wtid]
    cur_la = la_v[wtid]

    # new name of the map
    name = "plt_" + save_n + ext

    # plot the mapping
    onev = lab_thronev(cur_la, cur_id, cur_wt.shape[0], name, loc, save) 

    print("Plotting done!!\n")

    return onev


# function to run all while circumventing the need to perfom SOM mapping and even creating a SOM object
def all_1lab_noobj(label_dict, weight_dict, idx_dict, loc, ext, thresh, save):
    
    wt_k, wt_v = lookup(weight_dict)
    la_k, la_v = lookup(label_dict)
    id_k, id_v = lookup(idx_dict)
    
    # set the weights first
    for wtid in range(len(wt_k)):
        
        save_n = wt_k[wtid].rstrip('.npy')
        
        print("Starting ", wt_k[wtid], ' ...')
        # keys are not needed
        
        # setting values for the current iteration
        cur_wt = wt_v[wtid]
        cur_la = la_v[wtid]
        cur_id = id_v[wtid]
        
        # already got the SOM map

        # new name of the map
        name = "plt_" + save_n + ext

        # plot the mapping
        if thresh == True:
            lab_thresh(cur_la, cur_id, cur_wt.shape[0], name, loc, save)
        else:
            lab_nothresh(cur_la, cur_id, cur_wt.shape[0], name, loc, save)
            
        print("Plotting done!!\n")

    return

# Applying the SOM labeling functions...

In [None]:
# function to find max in a mapping
def maxmap(map_v):
    maxi = 0
    
    for kk in map_v.keys():
        li = map_v[kk]
        for i in li:
            if i > maxi:
                maxi = i
                tk = kk
            else:
                continue
    
    print("max: ", maxi)
    print("maxk: ", tk)
    return

# function to display map
def dispmap(map_v):
    
    for kk in map_v.keys():
        print("Key: ", kk)
        print("Val: ", map_v[kk])
        print("**********************************")
    
    return

# Applying Brake Label...

In [None]:
maxmap(tr_map['ten_re_id.csv'])
br_remap['te_brl'].shape

In [None]:
# applying the brake label but with mapping already done
all_1lab_noobj(br_remap, tr_wt, tr_map, "results", "_brn", True, False)

# Applying Speed Label...

In [None]:
# applying the speed label but with mapping already done
all_1lab_noobj(sd_remap, tr_wt, tr_map, "results", "_sdl", True, True)

# Applying Forward Backward Label...

In [None]:
# applying the forward backward label but with mapping already done
all_1lab_noobj(fb_remap, tr_wt, tr_map, "results", "_fbl", True, True)

# Applying Left Right Label...

In [None]:
# applying the left right label but with mapping already done
all_1lab_noobj(lr_remap, tr_wt, tr_map, "results", "_lrl", True, True)

# Applying Top Bottom Label...

In [None]:
# applying the top bottom label but with mapping already done
all_1lab_noobj(tb_remap, tr_wt, tr_map, "results", "_tbl", True, True)

# Applying Lane Gap Label...

In [None]:
# applying the lane gap label but with mapping already done
all_1lab_noobj(lg_remap, tr_wt, tr_map, "results", "_lgl", True, True)

# Applying distract label...

In [None]:
di_di, tr_di, di_map = dict(), dict(), dict()
di_di['distract'] = di_remap
tr_di['aln_re_wt.npy'] = tr_wt['aln_re_wt.npy']
di_map['aln_re_id.csv'] = tr_map['aln_re_id.csv']
all_1lab_noobj(di_di, tr_di, di_map, "results", "_dil", True, True)

In [None]:
tr_map.keys()

# Applying 3 Segment Label...

In [None]:
# applying the segment 3 label but with mapping already done
all_1lab_noobj(s3_remap, tr_wt, tr_map, "results", "_s3n", True, False)

# Applying 2 segment

In [None]:
# applying the segment 2 label but with mapping already done
all_1lab_noobj(s2_remap, tr_wt, tr_map, "results", "_s2l", True, False)

# Applying 4 segment

In [None]:
# applying the segment 4 label but with mapping already done
all_1lab_noobj(s4_remap, tr_wt, tr_map, "results", "_s4l", True, False)

# Applying 5 segment

In [None]:
# applying the segment 5 label but with mapping already done
all_1lab_noobj(s5_remap, tr_wt, tr_map, "results", "_s5l", True, False)

# Applying black-gray scale labeling...

In [None]:
# black to gray scale label over the map
all_1lab_thrline(s2_remap, tr_wt, tr_map, "results", "_btg", True)

In [None]:
# black to gray scale label over the map
all_1lab_thrline(s3_remap, tr_wt, tr_map, "results", "_b3g", True)

In [None]:
# black to gray scale label over the map
all_1lab_thrline(s4_remap, tr_wt, tr_map, "results", "_b4g", True)

In [None]:
# black to gray scale label over the map
all_1lab_thrline(s5_remap, tr_wt, tr_map, "results", "_b5g", True)

In [None]:
# black to gray scale label over the map
all_1lab_thrline(s6_remap, tr_wt, tr_map, "results", "_b6g", True)

In [None]:
# black to gray scale label over the map
all_1lab_thrline(s7_remap, tr_wt, tr_map, "results", "_b7g", True)

In [None]:
li_di = dict()
for key in sp_remap.keys():
    alist = np.zeros(sp_remap[key].shape, dtype='int')
    for i in range(len(sp_remap[key])):
        alist[i] = i
    li_di[key] = alist

In [None]:
# black to gray scale label over the map
all_1lab_thrline(li_di, tr_wt, tr_map, "results", "_bg", True)

# Using multi colors...

# All Split Label..

In [None]:
# applying the multi color split to 3 distractions but with mapping already done
all_1lab_thrcolor(sp_remap, tr_wt, tr_map, "results", "_spl", True)

In [None]:
# applying the multi color split to all distractions label but with mapping already done
all_1lab_thrcolor(sp_remap, tr_wt, tr_map, "results", "_spl", True)

# Breakup and label...

In [None]:
## 2 divisions
# applying break up label but with mapping already done
all_1lab_thrcolor(sp_remap, tr_wt, tr_map, "results", "_sp2set", True)

In [None]:
## 3 divisions
# applying break up label but with mapping already done
all_1lab_thrcolor(sp_remap, tr_wt, tr_map, "results", "_sp3set", True)

In [None]:
## 4 divisions
# applying break up label but with mapping already done
all_1lab_thrcolor(sp_remap, tr_wt, tr_map, "results", "_sp4set", True)

In [None]:
## 5 divisions
# applying break up label but with mapping already done
all_1lab_thrcolor(sp_remap, tr_wt, tr_map, "results", "_sp5set", True)

In [None]:
## 8 divisions
# applying break up label but with mapping already done
all_1lab_thrcolor(sp_remap, tr_wt, tr_map, "results", "_sp8set", True)

In [None]:
## 10 divisions
# applying break up label but with mapping already done
all_1lab_thrcolor(sp_remap, tr_wt, tr_map, "results", "_sp10set", True)

# One driver at a time...

In [None]:
# one hf label only over the whole map
allhf = all_1lab_throne(1, sp_remap, tr_wt, tr_map, "results", "_one", False)

In [None]:
# one mu label only over the whole map
allmu = all_1lab_throne(2, sp_remap, tr_wt, tr_map, "results", "_one", False)

In [None]:
# one te label only over the whole map
allte = all_1lab_throne(3, sp_remap, tr_wt, tr_map, "results", "_one", False)

In [None]:
# one al label only over the whole map
allal = all_1lab_throne(0, sp_remap, tr_wt, tr_map, "results", "_one", False)

# Comparing one vs all drivers...

In [None]:
# one vs all hf labels
indhf = all_1lab_thronev(1, sp_remap, tr_wt, tr_map, "results", "_vs", True)

In [None]:
# one vs all mus labels
indmu = all_1lab_thronev(2, sp_remap, tr_wt, tr_map, "results", "_vs", True)

In [None]:
# one vs all tex labels
indte = all_1lab_thronev(3, sp_remap, tr_wt, tr_map, "results", "_vs", True)

In [None]:
# one vs all al labels
indal = all_1lab_thronev(0, sp_remap, tr_wt, tr_map, "results", "_vs", True)

In [None]:
# functions to understand mappings
# function to set the mapping values to keys
def set_map(labeler, listofkeys):
    
    fin = defaultdict(list)
    for key in listofkeys:
        k1 = key[0]
        k2 = key[1]
        klist = labeler[k1, k2]
        
        for kk in klist:
            fin[k1, k2].append(kk)
        
    return fin
        

# save the mappings obtained...
# store the index as a csv file
def store_map(fname, cur_dict):
    
    os.chdir('mapping')
    name = fname + '.csv'
    with open(name, 'w') as f:
        
        for key in cur_dict.keys():
            cur_lis = cur_dict[key]
            cur_len = len(cur_lis)
            
            f.write("%d,%d,%d,"%(key[0], key[1], cur_len))
            
            for li in cur_lis:
                f.write("%d,"%(li))
            f.write("\n")
            
        print("File ", name, " saved!!")
        
    os.chdir('..')
    return

# load up the index from a csv file
def load_map(fname):
    
    os.chdir('mapping')
    idx = defaultdict(list)
    
    with open(fname, 'rt') as f:
        cur_csv = csv.reader(f)
        
        for line in cur_csv:
            for val in range(3, int(line[2])+3):
                
                k1 = int(line[0])
                k2 = int(line[1])
                vv = int(line[val])
                
                idx[k1, k2].append(vv)
                
    os.chdir('..')
    print("File ", fname, " has been loaded")
    return idx

In [None]:
# creating the mappings of each coordinate [one vs all]

hfq = dict()
i = 0
for hfmap in indhf:
    namehf = "hfq_map" + str(i)
    nmap = set_map(tr_map['hfn_re_id.csv'], hfmap)
    hfq[namehf] = nmap
    i = i + 1
    

muq = dict()
j = 0
for mumap in indmu:
    namemu = "muq_map" + str(j)
    nmap = set_map(tr_map['mun_re_id.csv'], mumap)
    muq[namemu] = nmap
    j = j + 1

    
teq = dict()
k = 0
for temap in indte:
    namete = "teq_map" + str(k)
    nmap = set_map(tr_map['ten_re_id.csv'], temap)
    teq[namete] = nmap
    k = k + 1
    
    
alq = dict()
l = 0
for almap in indal:
    nameal = "alq_map" + str(l)
    nmap = set_map(tr_map['aln_re_id.csv'], almap)
    alq[nameal] = nmap
    l = l + 1

In [None]:
# storing the mappings [one vs all]

for hf in hfq.keys():
    store_map(hf, hfq[hf])
    
for mu in muq.keys():
    store_map(mu, muq[mu])
    
for te in teq.keys():
    store_map(te, teq[te])
    
for al in alq.keys():
    store_map(al, alq[al])

In [None]:
# creating the mappings of each coordinate [one only]

hfa = dict()
i = 0
for hfmap in allhf:
    namehf = "hfa_map" + str(i)
    nmap = set_map(tr_map['hfn_re_id.csv'], hfmap)
    hfa[namehf] = nmap
    i = i + 1
    

mua = dict()
j = 0
for mumap in allmu:
    namemu = "mua_map" + str(j)
    nmap = set_map(tr_map['mun_re_id.csv'], mumap)
    mua[namemu] = nmap
    j = j + 1

    
tea = dict()
k = 0
for temap in allte:
    namete = "tea_map" + str(k)
    nmap = set_map(tr_map['ten_re_id.csv'], temap)
    tea[namete] = nmap
    k = k + 1
    
    
ala = dict()
l = 0
for almap in allal:
    nameal = "ala_map" + str(l)
    nmap = set_map(tr_map['aln_re_id.csv'], almap)
    ala[nameal] = nmap
    l = l + 1

In [None]:
# storing the mappings [one only]

for hf in hfa.keys():
    store_map(hf, hfa[hf])
    
for mu in mua.keys():
    store_map(mu, mua[mu])
    
for te in tea.keys():
    store_map(te, tea[te])
    
for al in ala.keys():
    store_map(al, ala[al])

In [None]:
os.chdir('..')

In [None]:
ala_files = file_list("mapping", "csv")
# setting up all the mappings
ala_map = load_idx(ala_files)

In [None]:
hfa_files = file_list("mapping", "csv")
# setting up all the mappings
hfa_map = load_idx(hfa_files)

In [None]:
mua_files = file_list("mapping", "csv")
# setting up all the mappings
mua_map = load_idx(mua_files)

In [None]:
tea_files = file_list("mapping", "csv")
# setting up all the mappings
tea_map = load_idx(tea_files)

In [None]:
alq_files = file_list("mapping", "csv")
# setting up all the mappings
alq_map = load_idx(alq_files)

In [None]:
hfq_files = file_list("mapping", "csv")
# setting up all the mappings
hfq_map = load_idx(hfq_files)

In [None]:
muq_files = file_list("mapping", "csv")
# setting up all the mappings
muq_map = load_idx(muq_files)

In [None]:
teq_files = file_list("mapping", "csv")
# setting up all the mappings
teq_map = load_idx(teq_files)

In [None]:
# label to connect the dots
def plot_pts(mapper, size):
    for the in mapper.keys():
        plt.axis([0, size, 0, size])
        theset = mapper[the]
        k1, k2 = [], []
        print("Mapper: ", the)
        
        totlen = len(theset)
        bgp = np.linspace(0, 0.85, tot_lab)
        itr = 0
        
        for ky in theset.keys():
            #print(ky)
            k1.append(ky[0]+0.5)
            k2.append(ky[1]+0.5)
            plt.plot(ky[0]+0.5, ky[1]+0.5, color=str(bgs[itr]), marker='s')

        plt.plot(k1, k2, 'y--')
        plt.show()
        
    for the in mapper.keys():
        print("Pts size: ", len(mapper[the]))


# label to understand the timings
def plot_time(mapper, size, split):
    
    for the in mapper.keys():
        plt.axis([0, size, 0, size])
        theset = mapper[the]
        k1, k2 = [], []
        print("Mapper: ", the)
        tot_lab = len(theset)
        colorlis = np.linspace(0, 0.85, tot_lab)
        itr = 0
        
        for ky in theset.keys():
            k1.append(ky[0]+0.5)
            k2.append(ky[1]+0.5)
            plt.plot(ky[0]+0.5, ky[1]+0.5, color=str(colorlis[itr]), marker='o')
            itr = itr + 1

        #plt.plot(k1, k2, 'y--')
        plt.show()
        
    for the in mapper.keys():
        print("Pts size: ", the, len(mapper[the]))

In [None]:
plot_pts(alq_map, 80)

In [None]:
plot_pts(hfq_map, 60)

In [None]:
plot_pts(muq_map, 60)

In [None]:
plot_pts(teq_map, 60)

In [None]:
plot_time(hfq_map, 60, 3)

In [None]:
plot_time(hfa_map, 60, 3)

In [None]:
# label to understand timings
for hf in hfq_map.keys():
    plt.axis([0, 60, 0, 60])
    theset = hfq_map[hf]
    k1, k2 = [], []
    
    for ky in theset.keys():
        #print(ky)
        k1.append(ky[0]+0.5)
        k2.append(ky[1]+0.5)
        plt.plot(ky[0]+0.5, ky[1]+0.5, 'bs')
        
    plt.plot(k1, k2, 'r--')
    plt.show()

# Mapping the demo labels...

In [None]:
demo = pd.read_excel('demo.xlsx')

In [None]:
dual = pd.DataFrame(demo.iloc[:1, :])

for i in range(demo.shape[0]):
    if demo.iloc[i, 1] == 1:
        app = pd.DataFrame(demo.iloc[i:i+1, :])
        dual = dual.append(app, ignore_index=True)
        
fi = dual.iloc[1:, :]

In [None]:
# examining the demo file...
li_lab = fi.columns.values

for i in range(len(li_lab)):
    print(i, "---")
    print(fi[li_lab[i]].value_counts())
    print("*******************************************")
    print("\n")

In [None]:
# arrangement of labels
def look_lab():
    for i in range(li_lab.shape[0]):
        print(i, " ----> ", li_lab[i])
        
    return

In [None]:
look_lab()

# Finalizing the labels...

In [None]:
# extract a label from demo file
# modifying the existing split label by applying the new demo labels

def get_demo(lsplit, colu):
    dt = type(fi[colu].iloc[0])
    nlab = np.zeros(fi.shape[0], dtype=dt).reshape(-1, 1)
    for i in range(fi.shape[0]):
        nlab[i] = fi[colu].iloc[i]
        
    slab = set(nlab[:, 0])
    print(slab)
    dlab = dict()
    itr = 0
    for s in slab:
        dlab[s] = itr
        itr = itr + 1
    print(dlab)
    
    fin = (nlab, dlab)
    sp_k, sp_v = lookup(lsplit)
    nl, dl = fin[0], fin[1]
    new_l = []
    
    for i in range(len(sp_k)):
        cur_k = sp_k[i]
        cur_v = sp_v[i]
        
        new_lab = np.zeros(cur_v.shape, dtype='int').reshape(-1, 1)
        
        for i in range(new_lab.shape[0]):
            cur_sp = cur_v[i] # get the index of the demolab
            if cur_sp >= 40:
                cur_sp = cur_sp % 40
            sp_nl = nl[cur_sp, 0] # get the exact demolab based on index [exact label]
            nl_dl = dl[sp_nl] # get the set dictionary value to map [exact mapping]
            new_lab[i, 0] = nl_dl # assignment [add to final label]
            
        new_l.append(new_lab[:, 0])
    
    new_di = join_dict(sp_k, new_l)
    print("The Mapping: ", dl)
    return new_di

# Gender Label...

In [None]:
gen2 = get_demo(sp_remap, "Intake_Gender")

In [None]:
# applying the gender intake label but with mapping already done
all_1lab_noobj(gen2, tr_wt, tr_map, "results", "_gen2", True, True)

# Age Label...

In [None]:
age3 = get_demo(sp_remap, "Intake_Age")

In [None]:
# applying the segment 5 label but with mapping already done
all_1lab_noobj(age3, tr_wt, tr_map, "results", "_age3", True, True)

# Eye Problem Label...

In [None]:
eye4 = get_demo(sp_remap, "Intake_ProbEyes")

# Applying clustering methods...

# K-means...

In [None]:
from sklearn.cluster import KMeans

In [None]:
# plotting individual sets of points
def plotter(plotset, size):
    plt.axis([0, size, 0, size])
    for key in plotset:
        plt.plot(key[0], key[1], 'b.')
    plt.show()
    return
    
# converting keys to xy axis
def convkeys(keyset):
    conv = np.zeros(len(keyset)*2).reshape(-1, 2)
    
    i = 0
    j = 0
    for ak in keyset:
        conv[i, j] = ak[0]
        conv[i, j+1] = ak[1]
        i = i + 1
        
    return conv

# apply and display kmeans, display original
def dispkmeans(kmap, imap, size):
    # getting the keys
    ktest = list(kmap.keys())
    itest = list(imap.keys())
    kconv = convkeys(ktest)
    iconv = convkeys(itest)
    
    # plotting the all map
    print("Plotting the all points map before kmeans clustering...")
    plotter(kmap, size)
    
    # creating the kmeans model
    ktot = len(itest)
    kmodel = KMeans(n_clusters=ktot)
    kmodel.fit(kconv)
    kclust = kmodel.cluster_centers_
    
    # plotting the kmeans cluster centers
    print("Total keys in map after kmeans: ", ktot, len(itest))
    print("Keys resulting from the Kmeans centers: ", kclust)
    plt.axis([0, size, 0, size])
    plt.scatter(kclust[:, 0], kclust[:, 1], label='True Position')
    print("Plotting the resulting Kmeans centers map...")
    plt.show()
    
    # plotting the unique SOM results
    print("Unique keys resulting from the SOM: ", itest)
    plt.axis([0, size, 0, size])
    plt.scatter(iconv[:, 0], iconv[:, 1], label='True Position')
    print("Plotting the unique map clusters...")
    plt.show()
    print("****************************************")
    return


In [None]:
for k1, k2 in zip(hfa_map.keys(), hfq_map.keys()):
    print(k1, k2)
    dispkmeans(hfa_map[k1], hfq_map[k2], 60)

In [None]:
for k1, k2 in zip(mua_map.keys(), muq_map.keys()):
    print(k1, k2)
    dispkmeans(mua_map[k1], muq_map[k2], 60)

In [None]:
for k1, k2 in zip(tea_map.keys(), teq_map.keys()):
    print(k1, k2)
    dispkmeans(tea_map[k1], teq_map[k2], 60)

In [None]:
for k1, k2 in zip(ala_map.keys(), alq_map.keys()):
    print(k1, k2)
    dispkmeans(ala_map[k1], alq_map[k2], 80)

# Agglomerative Hierarchial

In [None]:
# import hierarchical clustering libraries
import scipy.cluster.hierarchy as sch
from sklearn.cluster import AgglomerativeClustering

In [None]:
kval = list(hfa_map['hfa_map0.csv'].keys())
points = convkeys(kval)
upts = list(hfq_map['hfq_map0.csv'].keys())
# create dendrogram
dendrogram = sch.dendrogram(sch.linkage(points, method='ward'))
# create clusters
hc = AgglomerativeClustering(n_clusters=len(upts), affinity = 'euclidean', linkage = 'ward')
# save clusters for chart
y_hc = hc.fit_predict(points)

In [None]:
upts = list(hfq_map['hfq_map0.csv'].keys())
cpts = convkeys(upts)
# create dendrogram
dendrogram = sch.dendrogram(sch.linkage(points, method='ward'))
# create clusters
hc = AgglomerativeClustering(n_clusters=len(upts), affinity = 'euclidean', linkage = 'ward')
# save clusters for chart
y_hc = hc.fit_predict(upts)

In [None]:
# create dendrogram
dendrogram = sch.dendrogram(sch.linkage(cpts, method='ward'))
# create clusters
hc = AgglomerativeClustering(n_clusters=len(upts), affinity = 'euclidean', linkage = 'ward')
# save clusters for chart
y_hc = hc.fit_predict(cpts)

# Testing distance metrics

In [None]:
# average of map points
def met1(qmap):
    fli = []
    for key in qmap.keys():
        avgk1 = 0
        avgk2 = 0
        k1 = []
        k2 = []
        
        for k in qmap[key].keys():
            k1.append(k[0])
            k2.append(k[1])
        kt1 = 0
        kt2 = 0
        
        for i, j in zip(k1, k2):
            kt1 = kt1 + i
            kt2 = kt2 + j
        avgk1 = kt1/len(k1)
        avgk2 = kt2/len(k2)
        
        print("The key: ", key)
        print("The metric: ", avgk1, avgk2)
        fli.append(tuple((avgk1, avgk2)))
    return fli

# distance between maps euclidean
def met2()

In [None]:
hfqav = met1(hfq_map)

In [None]:
hfaav = met1(hfa_map)

# Pseudo Calssifier SOM

In [None]:
# load up the unique points
hfq_files = file_list("mapping", "csv")
# setting up all the mappings
hfq_map = load_idx(hfq_files)

In [None]:
# load up the unique points
muq_files = file_list("mapping", "csv")
# setting up all the mappings
muq_map = load_idx(muq_files)

In [None]:
# load up the unique points
teq_files = file_list("mapping", "csv")
# setting up all the mappings
teq_map = load_idx(teq_files)

In [None]:
# load up the unique points
alq_files = file_list("mapping", "csv")
# setting up all the mappings
alq_map = load_idx(alq_files)

In [None]:
# load up the non-unique points
full_files = file_list("mapping", "csv")
# setting up all the mappings
full_map = load_idx(full_files)

In [None]:
# plot the labeled points
# plot the driver points
# get the driver details
# sorted drivers and unsorted drivers
def pseudo_classifier_label(inp, lab, ind_mp, ind_pt, max_size, thresh_name, loc, save):
    
    colorlis = ['r', 'g', 'b', 'y', 'k']
    map_keys, map_vals = lookup(ind_pt)
    fdi = dict()
    
    for o in range(len(map_keys)):
        print("Currently mapping....", map_keys[o], " --> ", o)
        plt.axis([0, max_size, 0, max_size])
        cur = map_vals[o]
        liv, labcol = [], []
        print("Total no. of points to label....", len(cur))
        
        # open the main map dictionary
        for key in cur:
            
            lis = [0] * (len(set(lab)))

            # open a point in the list
            for pt in ind_mp[key]:

                # deal only with points in the required label limit
                if pt < lab.shape[0]: 
                    res = lab[pt]
                    lis[res] = lis[res] + 1
                    maxi = max(lis)
            #print(lis)

            # main map
            # locate the index of the maximum value and plot only that
            for i in range(len(lis)):
                if lis[i] == maxi:
                    #print("MAX: ", maxi)
                    pts = i
                    #print("MAX lab: ", pt)

            
            tuv = (key[0], key[1], pts)
            liv.append(tuv)
            
            #print("x = ", key[0], " y = ", key[1], " lab = ", pts)
            plt.plot(key[0], key[1], marker='.', color=colorlis[pts])
            labcol.append(pts)
                    
        print("Label Analysis:-")
        print("Total no. of labels: ", len(labcol))

        # determine label differences
        ldiff = list(set(labcol))
        cur_li = [0] * (max(ldiff)+1)
        for thept in labcol:
            cur_li[thept] = cur_li[thept] + 1
        for tot in range(len(cur_li)):
            print("Total no. of lab ", tot+1, " = ", cur_li[tot])
            percent = (cur_li[tot]/len(labcol))*100
            print("Total percentage of lab ", tot+1, " = ", percent, "%")
                
        
        nmod = map_keys[o].rstrip(".csv")
        thn = nmod + thresh_name 
        plt.title(thn)

        # save the labeled map
        if save == True:
            os.chdir(loc)
            plt.savefig(thn)
            os.chdir('..')
            print("map saved!!")

        plt.show()
        
        fdi[map_keys[o]] = liv
        print("\n")
        
    return fdi

# Brake label classifier

In [None]:
hfq_cl = pseudo_classifier_label(num_npy, br_remap['hf_brl'], full_map['hfn_re_id.csv'], hfq_map, 
                                 60, "_br_cl2", "results", True)

In [None]:
muq_cl = pseudo_classifier_label(num_npy, br_remap['mu_brl'], full_map['mun_re_id.csv'], muq_map, 
                                 60, "_br_cl2", "results", True)

In [None]:
teq_cl = pseudo_classifier_label(num_npy, br_remap['te_brl'], full_map['ten_re_id.csv'], teq_map, 
                                 60, "_br_cl2", "results", True)

In [None]:
alq_cl = pseudo_classifier_label(num_npy, br_remap['al_brl'], full_map['aln_re_id.csv'], alq_map, 
                                 80, "_br_cl2", "results", True)

# Speed label classifier

In [None]:
hfq_sd = pseudo_classifier_label(num_npy, sd_remap['hf_sdl'], full_map['hfn_re_id.csv'], hfq_map, 
                                 60, "_sd_cl2", "results", True)

In [None]:
muq_sd = pseudo_classifier_label(num_npy, sd_remap['mu_sdl'], full_map['mun_re_id.csv'], muq_map, 
                                 60, "_sd_cl2", "results", True)

In [None]:
teq_sd = pseudo_classifier_label(num_npy, sd_remap['te_sdl'], full_map['ten_re_id.csv'], teq_map, 
                                 60, "_sd_cl2", "results", True)

In [None]:
alq_sd = pseudo_classifier_label(num_npy, sd_remap['al_sdl'], full_map['aln_re_id.csv'], alq_map, 
                                 80, "_sd_cl2", "results", True)

# Front back label

In [None]:
hfq_fb = pseudo_classifier_label(num_npy, fb_remap['hf_fbl'], full_map['hfn_re_id.csv'], hfq_map, 
                                 60, "_fb_cl2", "results", True)

In [None]:
muq_fb = pseudo_classifier_label(num_npy, fb_remap['mu_fbl'], full_map['mun_re_id.csv'], muq_map, 
                                 60, "_fb_cl2", "results", True)

In [None]:
teq_fb = pseudo_classifier_label(num_npy, fb_remap['te_fbl'], full_map['ten_re_id.csv'], teq_map, 
                                 60, "_fb_cl2", "results", True)

In [None]:
alq_fb = pseudo_classifier_label(num_npy, fb_remap['al_fbl'], full_map['aln_re_id.csv'], alq_map, 
                                 80, "_fb_cl2", "results", True)

# Left right label

In [None]:
hfq_lr = pseudo_classifier_label(num_npy, lr_remap['hf_lrl'], full_map['hfn_re_id.csv'], hfq_map, 
                                 60, "_lr_cl2", "results", True)

In [None]:
muq_lr = pseudo_classifier_label(num_npy, lr_remap['mu_lrl'], full_map['mun_re_id.csv'], muq_map, 
                                 60, "_lr_cl2", "results", True)

In [None]:
teq_lr = pseudo_classifier_label(num_npy, lr_remap['te_lrl'], full_map['ten_re_id.csv'], teq_map, 
                                 60, "_lr_cl2", "results", True)

In [None]:
alq_lr = pseudo_classifier_label(num_npy, lr_remap['al_lrl'], full_map['aln_re_id.csv'], alq_map, 
                                 80, "_lr_cl2", "results", True)

# Top bottom label

In [None]:
hfq_tb = pseudo_classifier_label(num_npy, tb_remap['hf_tbl'], full_map['hfn_re_id.csv'], hfq_map, 
                                 60, "_tb_cl2", "results", True)

In [None]:
muq_tb = pseudo_classifier_label(num_npy, tb_remap['mu_tbl'], full_map['mun_re_id.csv'], muq_map, 
                                 60, "_tb_cl2", "results", True)

In [None]:
teq_tb = pseudo_classifier_label(num_npy, tb_remap['te_tbl'], full_map['ten_re_id.csv'], teq_map, 
                                 60, "_tb_cl2", "results", True)

In [None]:
alq_tb = pseudo_classifier_label(num_npy, tb_remap['al_tbl'], full_map['aln_re_id.csv'], alq_map, 
                                 80, "_tb_cl2", "results", True)

# Lane gap label

In [None]:
hfq_lg = pseudo_classifier_label(num_npy, lg_remap['hf_lgl'], full_map['hfn_re_id.csv'], hfq_map, 
                                 60, "_lg_cl2", "results", True)

In [None]:
muq_lg = pseudo_classifier_label(num_npy, lg_remap['mu_lgl'], full_map['mun_re_id.csv'], muq_map, 
                                 60, "_lg_cl2", "results", True)

In [None]:
teq_lg = pseudo_classifier_label(num_npy, lg_remap['te_lgl'], full_map['ten_re_id.csv'], teq_map, 
                                 60, "_lg_cl2", "results", True)

In [None]:
alq_lg = pseudo_classifier_label(num_npy, lg_remap['al_lgl'], full_map['aln_re_id.csv'], alq_map, 
                                 80, "_lg_cl2", "results", True)

# Split 2 label

In [None]:
hfq_s2 = pseudo_classifier_label(num_npy, s2_remap['hf_s2l'], full_map['hfn_re_id.csv'], hfq_map, 
                                 60, "_s2_cl2", "results", True)

In [None]:
muq_s2 = pseudo_classifier_label(num_npy, s2_remap['mu_s2l'], full_map['mun_re_id.csv'], muq_map, 
                                 60, "_s2_cl2", "results", True)

In [None]:
teq_s2 = pseudo_classifier_label(num_npy, s2_remap['te_s2l'], full_map['ten_re_id.csv'], teq_map, 
                                 60, "_s2_cl2", "results", True)

In [None]:
alq_s2 = pseudo_classifier_label(num_npy, s2_remap['al_s2l'], full_map['aln_re_id.csv'], alq_map, 
                                 80, "_s2_cl2", "results", True)

# Split 3 label

In [None]:
hfq_s3 = pseudo_classifier_label(num_npy, s3_remap['hf_s3l'], full_map['hfn_re_id.csv'], hfq_map, 
                                 60, "_s3_cl2", "results", True)

In [None]:
muq_s3 = pseudo_classifier_label(num_npy, s3_remap['mu_s3l'], full_map['mun_re_id.csv'], muq_map, 
                                 60, "_s3_cl2", "results", True)

In [None]:
teq_s3 = pseudo_classifier_label(num_npy, s3_remap['te_s3l'], full_map['ten_re_id.csv'], teq_map, 
                                 60, "_s3_cl2", "results", True)

In [None]:
alq_s3 = pseudo_classifier_label(num_npy, s3_remap['al_s3l'], full_map['aln_re_id.csv'], alq_map, 
                                 80, "_s3_cl2", "results", True)

# Split 4 data

In [None]:
hfq_s4 = pseudo_classifier_label(num_npy, s4_remap['hf_s4l'], full_map['hfn_re_id.csv'], hfq_map, 
                                 60, "_s4_cl2", "results", True)

In [None]:
muq_s4 = pseudo_classifier_label(num_npy, s4_remap['mu_s4l'], full_map['mun_re_id.csv'], muq_map, 
                                 60, "_s4_cl2", "results", True)

In [None]:
teq_s4 = pseudo_classifier_label(num_npy, s4_remap['te_s4l'], full_map['ten_re_id.csv'], teq_map, 
                                 60, "_s4_cl2", "results", True)

In [None]:
alq_s4 = pseudo_classifier_label(num_npy, s4_remap['al_s4l'], full_map['aln_re_id.csv'], alq_map, 
                                 80, "_s4_cl2", "results", True)

# Split 5 label

In [None]:
hfq_s5 = pseudo_classifier_label(num_npy, s5_remap['hf_s5l'], full_map['hfn_re_id.csv'], hfq_map, 
                                 60, "_s5_cl2", "results", True)

In [None]:
muq_s5 = pseudo_classifier_label(num_npy, s5_remap['mu_s5l'], full_map['mun_re_id.csv'], muq_map, 
                                 60, "_s5_cl2", "results", True)

In [None]:
teq_s5 = pseudo_classifier_label(num_npy, s5_remap['te_s5l'], full_map['ten_re_id.csv'], teq_map, 
                                 60, "_s5_cl2", "results", True)

In [None]:
alq_s5 = pseudo_classifier_label(num_npy, s5_remap['al_s5l'], full_map['aln_re_id.csv'], alq_map, 
                                 80, "_s5_cl2", "results", True)

# Comparing each label with driver

In [None]:
# function to plot points
def qdr_plot(adr, names, grtitle, grloc, grsave):
    adrk, adrv = lookup(adr)
    
    # get list of labels
    labset = []
    colorlis = ['r', 'g', 'b', 'y', 'k']
    nameset = names
    
    for tup in adrv[0]:
        if tup[2] in labset:
            continue
        else:
            labset.append(tup[2])
    
    labset = sorted(labset)
    print("Labels are: ", labset)
    
    val_li = [0]*len(labset)
    for k in range(len(val_li)):
        val_li[k] = list()
        
    # open each dictionary
    for n in range(len(adrk)):
        cur = adrv[n]
        cur_len = len(cur)
        # get percent of each label
        lab_li = [0]*len(labset)
        
        for val in cur:
            lab_li[val[2]] = lab_li[val[2]] + 1
        
        print("Label count of Driver: ", adrk[n], " : ", lab_li)
        for k in range(len(lab_li)):
            percent = (lab_li[k]/cur_len)*100
            print("Percent of lab-", lab_li[k], " : ", percent)
            val_li[k].append(np.floor(percent))
            
        
        print("Plotting ", adrk[n], " complete!!")
        print("\n")
    
    # plotting phase
    nobr = val_li[0]
    yesbr = val_li[1]
    
    plt.figure(num=None, figsize=(5, 10), dpi=200, facecolor='w', edgecolor='k')
    x = np.arange(40)  # the label locations
    width = 0.2  # the width of the bars
    
    ax = plt.subplot(111)
    for i in range(len(val_li)):
        ax.barh(x+(width*i), val_li[i], width, label=nameset[i], color=colorlis[i], align='center')
    
    #ax.bar(x+(width*len(val_li)), val_li[len(val_li)-1], width, color='w', align='center')
    
    ax.autoscale(tight=True)
    plt.axis([0, 100, -1, 41])

    # Add some text for labels, title and custom x-axis tick labels
    ax.set_xlabel('Percentage')
    ax.set_ylabel('Driver ID')
    ax.set_title(grtitle)
    ax.legend(loc=grloc)
    
    
    if grsave == True:
        os.chdir('results')
        plt.savefig(grtitle)
        os.chdir('..')
        print("Map Saved!!")
        
    plt.show()
    return val_li

In [None]:
brlabs = ['Non-Brake', 'Brake']
sdlabs = ['Speed Below Avg.', 'Speed Above Avg.']
fblabs = ['Along X (Deceleration)', 'Along X (Accelertion)']
lrlabs = ['Acceleration along Y (Left)', 'Acceleration along Y (Right)']
tblabs = ['Along Z (Lower Altitude)', 'Along Z (Higher Altitude)']
lglabs = ['Left of lane center', 'Right of lane center']
s2labs = ['Section 1', 'Section 2']
s3labs = ['Section 1', 'Section 2', 'Section 3']
s4labs = ['Section 1', 'Section 2', 'Section 3', 'Section 4']
s5labs = ['Section 1', 'Section 2', 'Section 3', 'Section 4', 'Section 5']

brti = '_nonbrake_vs_brake'
sdti = '_belowavg_vs_aboveavg'
fbti = '_backward_vs_forward'
lrti = '_left_vs_right'
tbti = '_low_vs_high'
lgti = '_leftlane_vs_rightlane'
s2ti = '_1st_vs_2nd'
s3ti = '_1st_vs_2nd_vs_3rd'
s4ti = '_1st_vs_2nd_vs_3rd_vs_4th'
s5ti = '_1st_vs_2nd_vs_3rd_vs_4th_vs_5th'

In [None]:
albr = qdr_plot(alq_cl, brlabs, 'al'+brti, 'upper right', True)

In [None]:
alsd = qdr_plot(alq_sd, sdlabs, 'al'+sdti, 'upper right', True)

In [None]:
alfb = qdr_plot(alq_fb, fblabs, 'al'+fbti, 'upper right', True)

In [None]:
allr = qdr_plot(alq_lr, lrlabs, 'al'+lrti, 'upper right', True)

In [None]:
altb = qdr_plot(alq_tb, tblabs, 'al'+tbti, 'upper right', True)

In [None]:
allg = qdr_plot(alq_lg, lglabs, 'al'+lgti, 'upper right', True)

In [None]:
als2 = qdr_plot(alq_s2, s2labs, 'al'+s2ti, 'upper right', True)

In [None]:
als3 = qdr_plot(alq_s3, s3labs, 'al'+s3ti, 'upper right', True)

In [None]:
als4 = qdr_plot(alq_s4, s4labs, 'al'+s4ti, 'upper right', True)

In [None]:
als5 = qdr_plot(alq_s5, s5labs, 'al'+s5ti, 'upper right', True)

# Comparing each label and distraction

In [None]:
# function to plot points
def qdi_plot(adi, grtitle, grloc, grsave):
    
    x = np.arange(40)
    width = 0.2
    
    for i in range(len(adi[0])):
        plt.figure(num=None, figsize=(10, 5), dpi=100, facecolor='w', edgecolor='k')
        ax = plt.subplot(111)
        al, bl, cl = [], [], []
        aw, bw, cw = 0, 0, 0
        pt = 1
        
        for a, b, c in zip(adi[0][i], adi[1][i], adi[2][i]):
            al.append(a)
            bl.append(b)
            cl.append(c)
            
            li = [a, b, c]
            lar = max(li)
            sma = min(li)
            print(li)
            if a == lar:
                print("Driver ", pt, "most activated under distraction -Hands-Free- labeled by: ", grtitle[i])
                aw = aw + 1
            
            if b == lar:
                print("Driver ", pt, "most activated under distraction -Music- labeled by: ", grtitle[i])
                bw = bw + 1
                
            if c == lar:
                print("Driver ", pt, "most activated under distraction -Text- labeled by: ", grtitle[i])
                cw = cw + 1
                
            if a == sma:
                print("Driver ", pt, "least activated under distraction -Hands-Free- labeled by: ", grtitle[i])
            
            if b == sma:
                print("Driver ", pt, "least activated under distraction -Music- labeled by: ", grtitle[i])
                
            if c == sma:
                print("Driver ", pt, "least activated under distraction -Text- labeled by: ", grtitle[i])
                
            pt = pt + 1
            
        ax.bar(x, al, width, label='Hands-Free', color='r', align='center')
        ax.bar(x+width, bl, width, label='Music', color='g', align='center')
        ax.bar(x+width*2, cl, width, label='Text', color='b', align='center')

        ax.autoscale(tight=True)
        plt.axis([-1, 41, 0, 100])

        # Add some text for labels, title and custom x-axis tick labels
        ax.set_xlabel('Driver ID')
        ax.set_ylabel('Percentage')
        ax.set_title(grtitle[i])
        ax.legend(loc=grloc)

        print()
        winli = [aw, bw, cw]
        maxli = max(winli)
        minli = min(winli)
        maxper = (maxli/40)*100
        minper = (minli/40)*100
        print(winli, ((aw/40)*100), ((bw/40)*100), ((cw/40)*100))
        print(maxli, minli, maxper, minper)
        
        if aw == maxli:
            print("Most drivers tend to be activated under -Hands-Free-: ", maxli, maxper, " under label ", grtitle[i])

        if bw == maxli:
            print("Most drivers tend to be activated under -Music- ", maxli, maxper, " under label ", grtitle[i])

        if cw == maxli:
            print("Most drivers tend to be activated under -Text-:", maxli, maxper, " under label ", grtitle[i])

        if aw == minli:
            print("Least drivers tend to be activated under -Hands-Free-:", minli, minper, " under label ", grtitle[i])

        if bw == minli:
            print("Least drivers tend to be activated under -Music-:", minli, minper, " under label ", grtitle[i])

        if cw == minli:
            print("Least drivers tend to be activated under -Text-: ", minli, minper, " under label ", grtitle[i])
            
        if grsave == True:
            os.chdir('results')
            plt.savefig(grtitle[i])
            os.chdir('..')
            print("Map Saved!!")

        plt.show()
    return



# combining labels and plotting individual distractions
def indi_di(adi, names, grloc, grsave):
    
    plt.rcParams.update({'font.size': 12})
    
    colorlis = ['r', 'g', 'b', 'y', 'k']
    distract = ['hf', 'mu', 'te']
    
    for li in range(len(adi)):
        x = np.arange(40)
        plt.figure(num=None, figsize=(10, 5), dpi=200, facecolor='w', edgecolor='k')
        ax = plt.subplot(111)
            
        for i in range(len(adi[li])):
            dist = adi[li]
            width = 0.2
            ax.bar(x+width*i, dist[i], width, label=names[i], color=colorlis[i], align='center')

        ax.autoscale(tight=True)
        plt.axis([-1, 41, 0, 100])

        # Add some text for labels, title and custom x-axis tick labels
        ax.set_xlabel('Driver ID')
        ax.set_ylabel('Percentage')
        
        grtitle = distract[li]
        for n in names:
            grtitle = grtitle + '_' + n
            
        ax.set_title(grtitle)
        ax.legend(loc=grloc)

        if grsave == True:
            os.chdir('results')
            plt.savefig(grtitle)
            os.chdir('..')
            print("Map Saved!!")

        plt.show()
    return

In [None]:
def func_sp(alist):
    a, b, c = [], [], []
    
    for i in alist:
        ptr = 0
        ta, tb, tc = [], [], []
        for j in i:
            if ptr < 40:
                ta.append(j)
            if ptr < 80 and ptr >= 40:
                tb.append(j)
            if ptr >= 80:
                tc.append(j)
            ptr = ptr + 1
        
        a.append(ta)
        b.append(tb)
        c.append(tc)
        
    return [a, b, c]


abr = func_sp(albr)
asd = func_sp(alsd)
afb = func_sp(alfb)
alr = func_sp(allr)
atb = func_sp(altb)
alg = func_sp(allg)
as2 = func_sp(als2)
as3 = func_sp(als3)
as4 = func_sp(als4)
as5 = func_sp(als5)

In [None]:
qbr = [hfbr, mubr, tebr]
qsd = [hfsd, musd, tesd]
qfb = [hffb, mufb, tefb]
qlr = [hflr, mulr, telr]
qtb = [hftb, mutb, tetb]
qlg = [hflg, mulg, telg]
qs2 = [hfs2, mus2, tes2]
qs3 = [hfs3, mus3, tes3]
qs4 = [hfs4, mus4, tes4]
qs5 = [hfs5, mus5, tes5]

qbr_ti = ['Non_Brake','Brake']
qsd_ti = ['Below_Average_Speed', 'Above_Average_Speed']
qfb_ti = ['Backward_Deceleration', 'Forward_Acceleration']
qlr_ti = ['Left_Acceleration', 'Right_Acceleration']
qtb_ti = ['Lower_Altitude_Acceleration', 'Higher_Altitude_Acceleration']
qlg_ti = ['Left_Center_Lane', 'Right_Center_Lane']
qs2_ti = ['Sp2_1st', 'Sp2_2nd']
qs3_ti = ['Sp3_1st', 'Sp3_2nd', 'Sp3_3rd']
qs4_ti = ['Sp4_1st', 'Sp4_2nd', 'Sp4_3rd', 'Sp4_4th']
qs5_ti = ['Sp5_1st', 'Sp5_2nd', 'Sp5_3rd', 'Sp5_4th', 'Sp5_5th']

# Brake

In [None]:
indi_di(abr, qbr_ti, 'upper right', True)

In [None]:
qdi_plot(abr, qbr_ti, 'upper right', True)

# Speed

In [None]:
indi_di(asd, qsd_ti, 'upper right', True)

In [None]:
qdi_plot(asd, qsd_ti, 'upper right', True)

# acceleration x

In [None]:
indi_di(afb, qfb_ti, 'upper right', True)

In [None]:
qdi_plot(afb, qfb_ti, 'upper right', True)

# acceleration y

In [None]:
indi_di(alr, qlr_ti, 'upper right', True)

In [None]:
qdi_plot(alr, qlr_ti, 'upper right', True)

# acceleration z

In [None]:
indi_di(atb, qtb_ti, 'upper right', True)

In [None]:
qdi_plot(atb, qtb_ti, 'upper right', True)

# lane gap

In [None]:
indi_di(alg, qlg_ti, 'upper right', True)

In [None]:
qdi_plot(alg, qlg_ti, 'upper right', True)

# split 2

In [None]:
indi_di(as2, qs2_ti, 'upper right', True)

In [None]:
qdi_plot(as2, qs2_ti, 'upper right', True)

# split 3

In [None]:
indi_di(as3, qs3_ti, 'upper right', True)

In [None]:
qdi_plot(as3, qs3_ti, 'upper right', True)

# split 4

In [None]:
indi_di(as4, qs4_ti, 'upper right', True)

In [None]:
qdi_plot(as4, qs4_ti, 'upper right', True)

# split 5

In [None]:
indi_di(as5, qs5_ti, 'upper right', True)

In [None]:
qdi_plot(as5, qs5_ti, 'upper right', True)

In [None]:
# plot the labeled points
# plot the driver points
# get the driver details
# sorted drivers and unsorted drivers
def pseudo_classifier_data(inp, lab, ind_mp, ind_pt, max_size, thresh_name, loc, save):
    
    colorlis = ['r', 'g', 'b', 'y', 'k']
    map_keys, map_vals = lookup(ind_pt)
    fdi = dict()
    
    for o in range(39, len(map_keys)):
        print("Currently mapping....", map_keys[o], " --> ", o)
        f0, f1 = 0, 0
        #plt.axis([0, max_size, 0, max_size])
        cur = map_vals[o]
        liv, labcol = [], []
        print("Total no. of points to label....", len(cur))
        
        # open the main map dictionary
        for key in ind_mp.keys():
            # deal only with key having something in the list
            if(len(ind_mp[key]) != 0):
                lis = [0] * (len(set(lab)))

                # open a point in the list
                for pt in ind_mp[key]:

                    # deal only with points in the required label limit
                    if pt < lab.shape[0]: 
                        res = lab[pt]
                        lis[res] = lis[res] + 1
                        maxi = max(lis)
                #print(lis)
                
                # main map
                # locate the index of the maximum value and plot only that
                for i in range(len(lis)):
                    if lis[i] == maxi:
                        #print("MAX: ", maxi)
                        pts = i
                        #print("MAX lab: ", pt)
                
                # mini map
                for kk in cur:
                    if kk[0] == key[0] and kk[1] == key[1]:
                        f0 = 1
                        f1 = 1
                        break
                
                if f0 == 1 and f1 == 1:
                    tuv = (key[0], key[1], pts)
                    liv.append(tuv)
                    f0, f1 = 0, 0
                    print("x = ", key[0], " y = ", key[1], " lab = ", pts)
                    plt.plot(key[0], key[1], marker='.', color=colorlis[pts])
                    labcol.append(pts)
                    
        print("Label Analysis:-")
        print("Total no. of labels: ", len(labcol))

        # determine label differences
        ldiff = list(set(labcol))
        cur_li = [0] * (max(ldiff)+1)
        for thept in labcol:
            cur_li[thept] = cur_li[thept] + 1
        for tot in range(len(cur_li)):
            print("Total no. of lab ", tot+1, " = ", cur_li[tot])
            percent = (cur_li[tot]/len(labcol))*100
            print("Total percentage of lab ", tot+1, " = ", percent, "%")
                
            
        
        nmod = map_keys[o].rstrip(".csv")
        thn = nmod + thresh_name 
        plt.title(thn)

        # save the labeled map
        if save == True:
            os.chdir(loc)
            plt.savefig(thn)
            os.chdir('..')
            print("map saved!!")

        plt.show()
        
        fdi[map_keys[o]] = liv
        print("\n")
        
    return fdi

In [None]:
os.chdir('..')

In [None]:
hfq_map['hfq_map9.csv'].keys()

# Examining Segmented maps...

In [None]:
# load up the resultant mappings
map_seg = file_list("mapping", "csv")

# setting up all the mappings
seg_ind = load_idx(map_seg)

In [None]:
def skip_key(keyno, thedict):
    retdict = dict()
    thek, thev = lookup(thedict)
    
    for i in range(len(thek)):
        if i == keyno:
            continue
        else:
            retdict[thek[i]] = thev[i]
    return retdict

In [None]:
segwt = dict()
segwt['aln_wt'] = tr_wt['aln_re_wt.npy'][:40, :40, :]
segwt['hfn_wt'] = tr_wt['hfn_re_wt.npy'][:30, :30, :]
segwt['mun_wt'] = tr_wt['mun_re_wt.npy'][:30, :30, :]
segwt['ten_wt'] = tr_wt['ten_re_wt.npy'][:30, :30, :]

In [None]:
# examine the brake label
all_1lab_noobj(br_remap, segwt, seg_ind, "results_dr", "_brs", True, True)

In [None]:
# examine the speed label
all_1lab_noobj(sd_remap, segwt, seg_ind, "results_dr", "_sds", True, True)

In [None]:
# examine the foward and backward motion
all_1lab_noobj(fb_remap, segwt, seg_ind, "results_dr", "_fbs", True, True)

In [None]:
# examine the left and right motion
all_1lab_noobj(lr_remap, segwt, seg_ind, "results_dr", "_lrs", True, True)

In [None]:
#examine the top and bottom motion
all_1lab_noobj(tb_remap, segwt, seg_ind, "results_dr", "_tbs", True, True)

In [None]:
# examine the lane gap
all_1lab_noobj(lg_remap, segwt, seg_ind, "results_dr", "_lgs", True, True)

In [None]:
os.getcwd()

# Functions to analyse SOM results...

In [None]:
# functions that help in cluster analysis

# function that returns values in a certain space on the map
def clus_ret(x1, x2, y1, y2, keyset):
    '''
    x1, x2, y1, y2: x and y coordinates
    keyset: set of indices
    '''
    
    # get a set of map points
    cl = []
    for i in range(x1, x2):
        for j in range(y1, y2):
            tup = (i, j)
            cl.append(tup)
    
    # refine the set of map points
    recl = []
    for c1 in keyset.keys():
        for c2 in cl:
            if c1 == c2:
                recl.append(c1)

    fi = []
    fi_win = defaultdict(list)
    for cc in recl:
        #print(cc)
        for l in keyset[cc]:
            fi_win[cc].append(l)
            fi.append(l)
    
    #print(fi)
    fi.sort()
    print(len(fi))
    return fi_win, fi



# function to return labels of a particular point
def lab_ret(id1, id2, idx, lab, name):
    
    li = idx[name][id1, id2]
    print("Labels in ", id1, " and ", id2, " of ", name, " are:")
    print(lab[li, 0])
    return lab[li, 0]




# function to return and display dictionaries of dataframes of the respective labels
def join_dflab(df_dict, lab_dict):
    
    # get the keys and values of required dicts
    df_k, df_v = sep_dict(df_dict)
    la_k, la_v = sep_dict(lab_dict)
    
    # combine label with dataframe as a new feature
    jo_v = []
    
    for i in range(len(df_k)):
        df = df_v[i]
        la = la_v[i]
        
        # convert label to dataframe
        la_df = pd.DataFrame({la_k[i] : la[:, 0]})
        
        # do the joining
        joint = pd.concat((df, la_df), axis=1)
        jo_v.append(joint)
        
    # return that dataframe dictionary
    dflab_dict = join_dict(df_k, jo_v)
    return dflab_dict


# function to show only keyset points in the labeled dataframe
def show_dflab(dflab1, keyset1):
    
    prev = keyset1[0]
    cur = prev + 1
    new_df = pd.DataFrame(dflab1.iloc[prev:cur, :])
    
    for key in range(1, len(keyset1)):
        prev = keyset1[key]
        cur = prev + 1
        nex = pd.DataFrame(dflab1.iloc[prev:cur, :])
        new_df = new_df.append(nex, ignore_index=True)
    
    key_df = pd.DataFrame({"og_keys" : keyset1})
    
    fin_df = pd.concat((key_df, new_df), axis=1)
    return fin_df


'''# plotting two results against each other as a comparision
def plot_aga(no1, no2):
    
    #no1: larger cluster
    #no2: smaller cluster
    
    
    # keep the larger set outside
    count = 0
    plt.axis([0, 31, 0, 31])
    
    for n1 in no1.keys():
        for n2 in no2.keys():
            for l1 in no1[n1]:
                for l2 in no2[n2]:
                    if l1 == l2:
                        count = count + 1
                        plt.plot(n1[0]+0.5, n1[1]+0.5, 'r.')
                        
    print("Number of matched points....", count)
    plt.show()
    
    
# plot points after searching on the map
def search(pt, mapp):
    plt.axis([0, 31, 0, 31])
    for cc in mapp.keys():
        for li in mapp[cc]:
            for p in pt:
                if li == p:
                    print(cc)
                    plt.plot(cc[0]+0.5, cc[1]+0.5, 'b.')
    plt.show()
    

# return a list of points that need to be searched                
def c_search(start, last):
    li = []
    for i in range(start, last+1):
        li.append(i)
        
    return li

# function that joins clusters
def join(m1, m2):
    di = {**m1, **m2}
    return di'''

# 9. Piecewise Label...

In [None]:
# label only splits of data
## NOTE: SOM mapping already complete
def pieces_1lab(lab_dict, ptr_dict, weight_dict, idx_dict, name, loc, thresh, save):
    
    la_k, la_v = sep_dict(lab_dict)
    wt_k, wt_v = sep_dict(weight_dict)
    id_k, id_v = sep_dict(idx_dict)
    pt_k, pt_v = sep_dict(ptr_dict)
    
    # set the weights first
    for wtid in range(len(wt_k)):
        # current values:
        cur_la = la_v[wtid]
        cur_wt = wt_v[wtid]
        cur_id = id_v[wtid]
        cur_pt = pt_v[wtid]
        
        save_n = wt_k[wtid].rstrip('.npy')
        
        print("Starting ", wt_k[wtid], ' ...')
        
        # get the junctions
        junc = ret_junc(cur_pt)
        start = junc[0]
        
        for jun in range(1, len(junc[:(len(junc)-1)])):
            end = junc[jun]
            
            jncs = str(start) + "x" + str(end)
            # new name of the map
            fname = "plt_" + save_n + "_pie_" + name + jncs

            # plot the mapping
            if thresh == True:
                lab_thresh(cur_la[start:end], cur_id, cur_wt.shape[0], fname, loc, save)
            else:
                lab_nothresh(cur_la, cur_id, cur_wt.shape[0], fname, loc, save)
                    
            start = end

In [None]:
# piece wise of all data for brake label
pieces_1lab(br_red, sp_red, load_wts, idd_map, "_brl", "weights", True, True)

# Testing some code...

In [None]:
#plt.axis([0, 11, -1, 10])
colorss = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}

si = 100
decvals = np.linspace(0, 0.85, si)

for i in range(si):
    cint = decvals[i]
    cstr = str(cint)
    #print(cidx, ' ---> ', cstr)
    plt.plot(i, 0, marker='s', color=cstr)
plt.show()

In [None]:
plt.plot(i, 0, marker='s', color='0.85')
plt.savefig("uwu")

In [None]:
np.linspace(0, 1, 100)

# Unused code...

In [None]:
'''
### functions that plot som map cluster points

# function to load som and label values but not plot labels
def som_label_noplot(data, label, loc, wt):
    # data: dataset that will be used
    # label: 1d label to use
    # loc: name of file that stores the results and data to be loaded
    # wt: name of the weight file
    
    sig = np.random.rand(1)
    lr = np.random.rand(1)
    
    # create the som object
    mine = mySOM(size, size, data.shape[1], sig, lr)
    
    # set the weights
    mine.load_weights(loc, wt)
    
    # show the gray map (some understanding??)
    plt.bone()
    mapping = mine.distance_map().T
    plt.pcolor(mapping)
    plt.show()

    # collect map to data samples, map to data index
    win, ind = mine.win_map(data)
    
    return win, ind 


br_size = 22
# function to load som and label values but not plot labels
def som_label_brplot(data, label, loc, wt):
    # data: dataset that will be used
    # label: 1d label to use
    # loc: name of file that stores the results and data to be loaded
    # wt: name of the weight file
    
    sig = np.random.rand(1)
    lr = np.random.rand(1)
    
    size = br_size
    # create the som object
    mine = mySOM(size, size, data.shape[1], sig, lr)
    
    # set the weights
    mine.load_weights(loc, wt)
    
    # show the gray map (some understanding??)
    plt.bone()
    mapping = mine.distance_map().T
    plt.pcolor(mapping)
    plt.show()

    # collect map to data samples, map to data index
    win, ind = mine.win_map(data)
    
    return win, ind 
    
'''


# excel storage fail
'''
import xlwt
from xlwt import Workbook

# function to write all respective excel notes
def store_ind(cur_idx, fname):
    
    # create a workbook
    wb = Workbook()
    
    # create an excel sheet
    wb_sheet = wb.add_sheet('sheet_1')
    
    # excel sheet index
    idx1 = 0
    
    for key in cur_idx.keys():
        cur_key = key
        cur_lis = cur_idx[key]
        
        # col 1 --> key 1
        idx2 = 0
        # create entry for key column
        wb_sheet.write(idx1, idx2, str(cur_key[0]))
        
        
        # col 2 --> key 2
        idx2 = 1
        # create entry for key column
        wb_sheet.write(idx1, idx2, str(cur_key[1]))
        
        
        # col 3 --> len(list)
        idx2 = 2
        # length of the particlar list
        lis_si = len(cur_lis)
        # create entry for value column
        wb_sheet.write(idx1, idx2, str(lis_si))
        
        
        # col 4 ++
        idx2 = 3
        for i in range(lis_si):
            wb_sheet.write(idx1, idx2, str(cur_lis[i]))
            idx2 = idx2 + 1
            
        # go to next row
        idx1 = idx1 + 1
        
    name = fname + ".xls"
    wb.save(name)
    print("File ", name, " has been saved")
'''
# class storage fail
'''class store_idx(object):
    
    def __init__(self):
        # key is from the map
        # cnt is started from 0
        self.key_cnt = 0
        self.key_col = []
        self.rec_col = []
        self.all_lis = []
    
    
    # function to create the key rec table
    def add_pt(self, key, lis):
        # key --> rec no.
        self.key_val = key
        self.key_cnt = self.key_cnt + 1
        
        # append key and value to list
        self.key_col.append(self.key_val)
        self.off_col.append(self.key_cnt)
        
        # get and set values to the all_list
        self.lis_val = lis
        self.all_lis.append(self.lis_val)
        
        
        
    
    # write up the file containing the whole list i.e the all_list
    def wrt_li(self, fname):
        # create and open file
        txtname = fname + ".txt"
        f = open(txtname, "w+")
        
        # write the list
        for li_val in self.all_lis:
            store = str(li_val) + " "
            f.write(store) 
            
        f.close()
        
        
    # 
    # call only after all points have been added!
    def ret_keyoff(self):
        return self.key_col, self.off_col
        
    # search and locate a list
    def sea_li(self, fname, sea_key):
        # get the index of the offset in the table
        for kk in range(len(self.key_col)):
            if sea_key == self.key_col[kk]:
                got_pt = kk
                
        self.offsets = self.off_col[got_pt]
        
        txtname = fname + ".txt"
        f = open(txtname, "r+")
        
        # read the file at the start and end
        val = f.readlines(self.offsets[1])
        
        return val'''