In [80]:
import os
import sys
import glob
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import pickle
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from tools.utils import scale_down
from tools.utils import standardize_data, print_genetic_param, print_settings, set_movements, angle_diff, get_distance
from statistics import mean
from random import choice
train_test_options = {"split": 6}

In [81]:
def check_sampling(x,sampling,data_index):
    i = 1
    sampling_acc = True
    down_limit = (x[1]-sampling)
    up_limit = (x[1]+sampling)
    while i<=sampling:
        if 0 <=down_limit and up_limit < len(data_index):
            if (data_index[x[1]-i] == True) or (data_index[x[1]+i] == True):
                sampling_acc = False
                break
        i = i + 1      
    return sampling_acc


def dist_and_bearing_diff(data):
    all_distances = []
    bearing_diff = []
    data_size=len(data)
    i = 0
    while i<data_size:
        if i + 1 >=data_size:
            break
        bearing_1, bearing_2 = data["HEADING"].iloc[i], data["HEADING"].iloc[i+1] 
        bearing_diff.append([abs(bearing_2 - bearing_1),i])
        lat_1, lon_1, lat_2, lon_2 = data["LAT"].iloc[i], data["LON"].iloc[i], data["LAT"].iloc[i+1], data["LON"].iloc[i+1]
        all_distances.append(get_distance(lat_1,lon_1,lat_2,lon_2))
        i = i +1
    return bearing_diff, all_distances 


def fitting_indexes(arr,new_size):
    i = 0
    r_arr = []
    while i <len(arr):
        if i+1 >=len(arr):
            break
        r_arr.append(arr[i+1]-arr[i])
        i = i + 1
    if len(r_arr) <= 0:
        return arr
    mean_space = mean(r_arr)
    i = 0 
    while i<len(arr)<new_size:
        if i+1 >=len(arr):
            break
        diff = arr[i+1]-arr[i]
        if mean_space <= diff:
            step = int(diff/2)
            new_index = arr[i] + step
            arr.insert(i+1,new_index)
            i = i + 1
        i = i + 1
    return arr

def scalling_down_windowed(data,n_sample,turn_sensitivity=30):
    if len(data) <= n_sample :
        return data
    size_correction = int(len(data) / n_sample) * n_sample
    data=data[:size_correction]
    data_size=len(data)
    data_index = [False for i in range(data_size)]
    sampling = int((int(data_size/n_sample) * 0.25))
    labels = data.columns
    
    temp_idx = [] 
    final_data = pd.DataFrame(data,columns=labels)
    final_data.reset_index(drop=True)
    
    #find the number of bearing differance above the turn sensitivity
    bearing_diff , all_distances = dist_and_bearing_diff(final_data)
    mean_dist = mean(all_distances)
    for idx, x in enumerate(bearing_diff):
        sampling_acc = check_sampling(x,sampling,data_index)
        if (x[0] > turn_sensitivity) and (all_distances[idx] > mean_dist/2) and sampling_acc:
            data_index[x[1]] = True
            temp_idx.append(x[1])
    while n_sample > len(temp_idx) and len(temp_idx) > 1 :
        fitting_indexes(temp_idx,n_sample)

    for x in temp_idx:
        data_index[x]=True
        
    return final_data[data_index]

In [82]:
ship =  pd.read_csv("ships/SIEM PILOT.csv")
shipname = ship.loc[0]["SHIPNAME"]
ship = ship [["TIMESTAMP","LAT","LON","HEADING"]]
ship['TIMESTAMP'] = pd.to_datetime(ship['TIMESTAMP'])  
ship.sort_values('TIMESTAMP',inplace=True)
ship=ship.reset_index(drop=True)
# ship = scalling_down_windowed(ship[0:500],train_test_options["split"])
n = 500  #chunk row size
ship_dfs = [ship[i:i+n] for i in range(0,ship.shape[0],n)]
ship_data_chunked = []
for idx,x in enumerate(ship_dfs):
    x = scalling_down_windowed(x,train_test_options["split"])
    x = np.array(x["HEADING"].values.astype(int))
    if len(x) == train_test_options["split"]:
        ship_data_chunked.append(x)
ship_data_chunked = np.array(ship_data_chunked)
ship ={"shipname":shipname,"data":ship_data_chunked}
print(ship["data"].shape)


(33, 6)


In [None]:
if len(glob.glob('ships')) != 0 :
    files = glob.glob("ships/*.csv")
    all_ships = [] 
    for file in files:
        ship =  pd.read_csv(file)
        shipname = ship.loc[0]["SHIPNAME"]
        ship = ship [["TIMESTAMP","LAT","LON","HEADING"]]
        ship['TIMESTAMP'] = pd.to_datetime(ship['TIMESTAMP'])  
        ship.sort_values('TIMESTAMP',inplace=True)
        ship=ship.reset_index(drop=True)
        n = 500  #chunk row size
        ship_dfs = [ship[i:i+n] for i in range(0,ship.shape[0],n)]
        ship_data_chunked = []
        for x in ship_dfs:
            x = scale_down(x,train_test_options["split"])
            x = np.array(x["HEADING"].values.astype(int))
            ship_data_chunked.append(x)
        ship_data_chunked = np.array(ship_data_chunked)
        ship ={"shipname":shipname,"data":ship_data_chunked}
    all_ships.append(ship)
    print(all_ships)

In [87]:
movement_list = ["step_up_left"]
# ,"step_up_right",
#                  "spiral_movement_right","spiral_movement_left",
#                  "expanding_square_right","expanding_square_left",
#                  "creeping_line_left","creeping_line_right",
#                  "sector_pattern_left","sector_pattern_right"]
from sklearn.externals import joblib
from gendis.genetic import GeneticExtractor
from tools.data_extraction import DataExtractor
dex = DataExtractor()
x_train, y_train, x_test, y_test = dex.load_datasets()
for x in movement_list:
    with open(x+".pkl", 'rb') as pickle_file:
        print(pickle_file)
        gen_ext = joblib.load( pickle_file)
        print(gen_ext)
        print("Experiment for pattern",x)
        distances_train = gen_ext.transform(x_train)
        lr = LogisticRegression()
        lr.fit(distances_train, y_train)
        distances_test = gen_ext.transform(ship["data"])
        print(lr.predict(distances_test))
        pickle_file.close()


2019-05-22 23:43:54.631 | INFO     | tools.data_extraction:read_datasets:22 - Reading the data files
2019-05-22 23:43:55.008 | SUCCESS  | tools.data_extraction:read_datasets:51 - Done reading files
2019-05-22 23:43:55.016 | INFO     | tools.data_extraction:load_datasets:129 - Loading the csv files to the appropriate train and test arrays(nparrays)
2019-05-22 23:43:55.033 | SUCCESS  | tools.data_extraction:load_datasets:134 - Done


<_io.BufferedReader name='step_up_left.pkl'>
None
Experiment for pattern step_up_left


AttributeError: 'NoneType' object has no attribute 'transform'