In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
import glob
import shutil
import numpy as np
import pandas as pd
import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt
from datetime import datetime
from statistics import mean
from random import choice
from sklearn.linear_model import LogisticRegression
from sklearn.externals import joblib
from sklearn import preprocessing
from gendis.genetic import GeneticExtractor
from tools.utils import scale_down, standardize_data, set_movements, angle_diff, get_distance

train_test_options = {"split": 25}
movement_list = ["step_up_left","step_up_right",
                 "spiral_movement_left","spiral_movement_right",
                 "expanding_square_left","expanding_square_right",
                 "creeping_line_left","creeping_line_right",
                 "sector_pattern_left","sector_pattern_right"]


In [None]:
def experiments(ship,movement_list,type_exp="",mixed_models=False):
    predictions = []
    proba = []
    type_exp =  "_"+type_exp if type_exp == "anglediff" else ""
    if not mixed_models:
        for x in movement_list:
            gen_ext = 0
            lr = 0
            with open("models/"+x+type_exp+"_genetic_extractor.pkl", 'rb') as pickle_file:
                gen_ext = joblib.load( pickle_file)
                pickle_file.close()
            with open("models/"+x+type_exp+"_logistic_regression.pkl",'rb') as pickle_file:
                lr = joblib.load(pickle_file)
                pickle_file.close()
            x_test = ship["data"]
            x_test = preprocessing.scale(x_test,axis=1)
            if x_test.shape[0] == 1:
                b = x_test[1].reshape(1,-1)
                b = preprocessing.normalize(b,axis=1)
                x_test = np.array([b]).reshape(1,25)
            x_test = preprocessing.normalize(x_test,axis=1)
            distances_test = gen_ext.transform(x_test)
            predictions.append(lr.predict(distances_test))
            proba.append(lr.predict_proba(distances_test))
    else:
        for x in movement_list:
            gen_ext = 0
            lr = 0
            gen_ext_anglediff = 0
            lr_angle_diff = 0
            type_exp_normal = ""
            type_exp_anglediff = "_anglediff"
            type_exp_mixed ="_mixed"
            with open("models/"+x+type_exp_normal+"_genetic_extractor.pkl", 'rb') as pickle_file:
                gen_ext = joblib.load( pickle_file)
                pickle_file.close()
            with open("models/"+x+type_exp_normal+"_logistic_regression.pkl",'rb') as pickle_file:
                lr = joblib.load(pickle_file)
                pickle_file.close()
            with open("models/"+x+type_exp_anglediff+"_genetic_extractor.pkl", 'rb') as pickle_file:
                gen_ext_anglediff = joblib.load( pickle_file)
                pickle_file.close()
            with open("models/"+x+type_exp_anglediff+"_logistic_regression.pkl",'rb') as pickle_file:
                lr_angle_diff = joblib.load(pickle_file)
                pickle_file.close()
            with open("models/"+x+type_exp_mixed+"_logistic_regression.pkl",'rb') as pickle_file:
                lr_mixed = joblib.load(pickle_file)
                pickle_file.close()
            
            x_test = ship["data"]
            x_test_anglediff = angle_diff(ship["data"])
            x_test = preprocessing.scale(x_test,axis=1)
            x_test_anglediff = preprocessing.scale(x_test_anglediff,axis=1)
            
            if x_test.shape[0] == 1:
                b = x_test[1].reshape(1,-1)
                b = preprocessing.normalize(b,axis=1)
                x_test = np.array([b]).reshape(1,25)
            x_test = preprocessing.normalize(x_test,axis=1)
            if x_test_anglediff.shape[0] == 1:
                b = x_test_anglediff[1].reshape(1,-1)
                b = preprocessing.normalize(b,axis=1)
                x_test_anglediff = np.array([b]).reshape(1,25)
            x_test_anglediff = preprocessing.normalize(x_test,axis=1)
            
            distances_test = gen_ext.transform(x_test)
            distances_test_anglediff = gen_ext_anglediff.transform(x_test_anglediff)
            
            mixed_distances = np.concatenate((distances_test,distances_test_anglediff),axis=1) 
            
            predictions.append(lr_mixed.predict(mixed_distances))
            proba.append(lr_mixed.predict_proba(mixed_distances))
    return predictions,proba

def results(movement_list,proba,predictions,proba_filter=False):
    proba_info = []
    pattern_indexes = []
    for idx,x in enumerate(movement_list):
        count = 0
        indexes = []
        for ind,i in enumerate(predictions[idx]):
            if proba_filter:
                if i ==0 and proba[idx][ind][0] > 0.75:
                    count = count + 1
                    proba_info.append({"pattern":x,"first_class":proba[idx][ind][0],"second_class":proba[idx][ind][1],"chunk":ind})
                    indexes.append(ind)
                else:
                    if i ==0:
                        count = count + 1
                        proba_info.append({"pattern":x,"first_class":proba[idx][ind][0],"second_class":proba[idx][ind][1],"chunk":ind})
                        indexes.append(ind)
        print(count,"times of pattern detection:",x," at indexes ",indexes)
        pattern_indexes.append(indexes)
    return proba_info,pattern_indexes

def fig_dir_creation(path,reset=False):
    if not reset :
        try:
            os.mkdir(path)
        except OSError:  
            print ("Creation of the directory %s failed" % path)
    else:
        if len(glob.glob(path))==0:
            os.mkdir(path)
        else:
            shutil.rmtree(path)
            os.mkdir(path)
            
def find_best_patterns(proba_info):
    a=pd.DataFrame(proba_info,columns=["pattern","chunk","first_class","second_class"])
    a.set_index("chunk",inplace=True)
    grouped_df = a[a.groupby("chunk").count()["pattern"]>1].groupby("chunk")
    best_patterns = []
    for key, item in grouped_df:
        temp = item[grouped_df.get_group(key).loc[key]["first_class"]==grouped_df.get_group(key).max()["first_class"]]
        best_patterns.append([key,temp.loc[key]["pattern"]])
    for x in best_patterns:
        print("For chunk no:",x[0],"the best pattern is ",x[1])   
    return best_patterns

def plots(movement_list,
          pattern_indexes,
          best_patterns,
          ship_name,
          ship_dfs,
          file="normal",
          scaled_down=True,
          rdp_data=None,
          save=False):
    chunks = []
    patterns = []
    for x in best_patterns:
        chunks.append(x[0])
        patterns.append(x[1])
    if save:
        if not os.path.isdir("figures/"+ship_name):
            os.mkdir("figures/"+ship_name)
        if not os.path.isdir("figures/"+ship_name+"/"+file):
            os.mkdir("figures/"+ship_name+"/"+file)
    base_path = "figures/"+ship_name+"/"+file
    for idx,x in enumerate(movement_list):
        path = base_path+"/"+x
        experiment = file+"\n"+x
        if save:
            if not os.path.isdir(path):
                os.mkdir(path)
        for ind,i in enumerate(pattern_indexes[idx]):
            if i in chunks:
                if(patterns[chunks.index(i)] == x):
                    plot_trajectory(ship_dfs[i],
                                    train_test_options["split"],
                                    i,
                                    ship_name,
                                    scaled_down,
                                    rdp_data,
                                    experiment,
                                    save,
                                    path)
            else:
                plot_trajectory(ship_dfs[i],
                                train_test_options["split"],
                                i,
                                ship_name,
                                scaled_down,
                                rdp_data,
                                experiment,
                                save,
                                path)

def epsilon_selection(real_data_len,data,size,epsilon=0):
    if real_data_len<=size:
        return False
    else:
        epsilon = epsilon
        i = 0 
        mask_len = real_data_len
        while mask_len!=size:
#             print("i:",i,mask_len,size,epsilon)
            mask = rdp(data,algo="iter", return_mask=True,epsilon=epsilon)
            mask_len = mask.count(True)
            if i >200:
                return False
            if mask_len < size:
                if size - mask_len <=1:
                    epsilon = epsilon - 0.0000000005
                else :
                    epsilon = epsilon - 0.000005
            else:
                if mask_len -size <=1:
                    epsilon = epsilon + 0.00000002
                else:
                    epsilon = epsilon + 0.000002
            i = i +1
        return mask

def print_results(movement_list,predictions,proba):
    for id_pat,patt in enumerate(movement_list):
        for id_pre,pred in enumerate(predictions[id_pat]):
            print("for chunk {2} and pattern {0} a prediction is made with probability {1}".format(patt,proba[id_pat][id_pre][0],id_pre)) if pred == 0 else None

def plot_trajectory(data,
                    split,
                    chunk_no,
                    shipname,
                    scaled_down=True,
                    rdp_data=None,
                    experiment = "",
                    save=False,
                    path=""):
    df_list = []
    data_list = []
    real_data=data.assign(Trajectory="Real Trajectory")
    df_list.append(real_data)
    data_list.append(real_data["Trajectory"].iloc[0])
    pallete = ['r']
    sizes = [6]
    dashes = [(5,5)]
    markers = ["^"]
    if scaled_down:         
        data_scaled=scale_down(data,split).assign(Trajectory="Scaled Trajectory")
        if len(data_scaled) == split:
            df_list.append(data_scaled)
            data_list.append(data_scaled["Trajectory"].iloc[0])
            pallete.append('b')
            sizes.append(3)
            dashes.append("")
            markers.append("o")

    
    if rdp_data is not None and type(rdp_data) == type(pd.DataFrame()) and len(rdp_data)>0:
        rdp_data=data.assign(Trajectory="RDP Trajectory")
        df_list.append(rdp_data)
        data_list.append(rdp_data["Trajectory"].iloc[0])
        pallete.append('g')
        sizes.append(1)
        dashes.append((5, 10))
        markers.append("o")

    merged_data = pd.concat(df_list)

    fig, ax= plt.subplots(1, 1, figsize=(30, 20),sharex=True)    
    start = datetime.strftime(data["TIMESTAMP"].head(1).iloc[0],'%Y-%m-%d %H:%M:%S')
    end = datetime.strftime(data["TIMESTAMP"].tail(1).iloc[0],'%Y-%m-%d %H:%M:%S')
    title = "Vessel:"+shipname+"\nClass:"+experiment+"\nCHUNK NO: "+str(chunk_no)+"\n"+start+"----"+end
    ax.set_title(title,fontsize=20)
    sns.lineplot(x='LON', y='LAT',
                      data=merged_data,
                      ax=ax,
                      sort=False,
                      hue = "Trajectory",
                      palette = pallete,
                      hue_order= data_list,
                      size=merged_data["Trajectory"],
                      sizes = sizes,
                      size_order = data_list,
                      style = merged_data["Trajectory"],
                      style_order = data_list,
                      dashes = dashes,
                      markers = markers,
                      legend ="full",
                     )
    lgnd = plt.legend(loc="lower left", numpoints=1, fontsize=25)
    ax.set_ylabel('Latitute', fontsize = 20.0) # Y label
    ax.set_xlabel('Longitude ', fontsize = 20.0) # X label
    plt.savefig(path+"/chunk_no_"+str(chunk_no)+".png",dpi=fig.dpi) if save and path!="" else None
    
def read_ship_csv(file):
    ship =  pd.read_csv(file)
    shipname = ship.loc[0]["SHIPNAME"]
    ship = ship [["TIMESTAMP","LAT","LON","HEADING"]]
    ship['TIMESTAMP'] = pd.to_datetime(ship['TIMESTAMP'])  
    ship.sort_values('TIMESTAMP',inplace=True)
    ship=ship.reset_index(drop=True)
    return ship,shipname

In [None]:
# ship,shipname=read_ship_csv("ships/SIEM PILOT.csv")
# n = 500  #chunk row size
# ship_dfs = [ship[i:i+n] for i in range(0,ship.shape[0],n)]
# ship_data_chunked = []
# ship_data_chunked_index = [False for i in range(0,len(ship_dfs))]  
# for idx,x in enumerate(ship_dfs):
#     x = scale_down(x,train_test_options["split"])
#     x = np.array(x["HEADING"].values.astype(int))
#     if len(x) == train_test_options["split"]:
#         ship_data_chunked_index[idx] = True
#         ship_data_chunked.append(x)
# ship_data_chunked = np.array(ship_data_chunked)
# ship_dfs = [ship for (ship,index) in zip(ship_dfs,ship_data_chunked_index) if  index ]
# ship ={"shipname":shipname,"data":ship_data_chunked,"ship_dfs":ship_dfs}
# print("ERROR DIFFERENT SIZE OF DATA") if len(ship_dfs)!=ship["data"].shape[0] else None
    

In [None]:
# print("\nVessel:"+ship["shipname"]+"\nNormal Setting Results\n")
    
# predictions,proba=experiments(ship=ship,movement_list=movement_list)
# proba_info,pattern_indexes = results(movement_list=movement_list,
#                                      proba=proba,
#                                      predictions=predictions,
#                                      proba_filter=True)
# best_patterns=find_best_patterns(proba_info)
# plots(movement_list=movement_list,
#       pattern_indexes=pattern_indexes,
#       best_patterns=best_patterns,
#       ship_name=ship["shipname"],
#       ship_dfs=ship["ship_dfs"],
#       save=False)

In [None]:
fig_dir_creation("figures",True)
if len(glob.glob('ships')) != 0 :
    files = glob.glob("ships/*.csv")
    all_ships = [] 
    for file in files:
        ship,shipname=read_ship_csv(file)
        n = 500  #chunk row size
        ship_dfs = [ship[i:i+n] for i in range(0,ship.shape[0],n)]
        ship_data_chunked = []
        ship_data_chunked_index = [False for i in range(0,len(ship_dfs))]  
        for idx,x in enumerate(ship_dfs):
            x = scale_down(x,train_test_options["split"])
            x = np.array(x["HEADING"].values.astype(int))
            if len(x) == train_test_options["split"]:
                ship_data_chunked_index[idx] = True
                ship_data_chunked.append(x)
        ship_data_chunked = np.array(ship_data_chunked)
        ship_dfs = [ship for (ship,index) in zip(ship_dfs,ship_data_chunked_index) if  index ]
        ship ={"shipname":shipname,"data":ship_data_chunked,"ship_dfs":ship_dfs}
        print("ERROR DIFFERENT SIZE OF DATA") if len(ship_dfs)!=ship["data"].shape[0] else None
        all_ships.append(ship)


In [None]:
for ship in all_ships:
    print("\nVessel:"+ship["shipname"]+"\nNormal Setting Results\n")
    
    predictions,proba=experiments(ship=ship,movement_list=movement_list)
    proba_info,pattern_indexes = results(movement_list=movement_list,
                                         proba=proba,
                                         predictions=predictions,
                                         proba_filter=True)
    best_patterns=find_best_patterns(proba_info)
    plots(movement_list=movement_list,
          pattern_indexes=pattern_indexes,
          best_patterns=best_patterns,
          ship_name=ship["shipname"],
          ship_dfs=ship["ship_dfs"],
          save=True)
    
    print("\nVessel:"+ship["shipname"]+"\nBearing Rate Setting Results\n")

    
    predictions_agnlediff,proba_anglediff=experiments(ship=ship,
                                                      movement_list=movement_list,
                                                      type_exp="anglediff")
    proba_info_anglediff,pattern_indexes_anglediff = results(movement_list=movement_list,
                                                             proba=proba_anglediff,
                                                             predictions=predictions_agnlediff,
                                                             proba_filter=True)
    best_patterns_anglediff=find_best_patterns(proba_info_anglediff)
    plots(movement_list=movement_list,
          pattern_indexes=pattern_indexes_anglediff,
          best_patterns=best_patterns_anglediff,
          ship_name=ship["shipname"],
          ship_dfs=ship["ship_dfs"],
          file="anglediff",
          save=True)

    print("\nVessel:"+ship["shipname"]+"\nMixed Models Setting Results\n")

    predictions_mixed,proba_mixed=experiments(ship=ship,
                                              movement_list=movement_list,
                                              mixed_models=True)
    proba_info_mixed,pattern_indexes_mixed = results(movement_list=movement_list,
                                                     proba=proba_anglediff,
                                                     predictions=predictions_agnlediff,
                                                     proba_filter=True)
    best_patterns_mixed=find_best_patterns(proba_info_mixed)
    plots(movement_list=movement_list,
          pattern_indexes=pattern_indexes_mixed,
          best_patterns=best_patterns_mixed,
          ship_name=ship["shipname"],
          ship_dfs=ship["ship_dfs"],
          file="mixed_models",
          save=True)