In [1]:
import numpy as np
import pandas as pd
import scipy
from sklearn.svm import SVC
from sklearn import metrics
import itertools
from Data_Clean_Functions import *

In [2]:
PATH = "C://Users//cian1//Maritime Engineering//Machine-Learning Performance Predictions//Figures//"

In [3]:
# Load in Labeled data
day1 = pd.read_csv("Labeled_TP52_day1.csv")
day2 = pd.read_csv("Labeled_TP52_day2.csv")
day3 = pd.read_csv("Labeled_TP52_day3.csv")
day4 = pd.read_csv("Labeled_TP52_day4.csv")
day5 = pd.read_csv("Labeled_TP52_day5.csv")
day6 = pd.read_csv("Labeled_TP52_day6.csv")
day7 = pd.read_csv("Labeled_TP52_day7.csv")
day8 = pd.read_csv("Labeled_TP52_day8.csv")

In [4]:
# Convert time column to date-time.
day_arr=[day1,day2,day3,day4,day5,day6,day7,day8]

for day in day_arr:
    day["Utc"] = pd.to_datetime(day['Utc'])

In [5]:
#Combine data to one data frame
df=pd.concat(day_arr)
#df=df[df['Lon']<-9.39]

#Reset index to avoid repeating index
df=df.reset_index().drop('index',axis=1)

In [6]:
df.label.fillna('NR',inplace=True)

In [7]:
# Fill na values to be 0, NEEDS IMPROVEMENT
df.fillna(0,inplace=True)

In [8]:
#Create abs value columns for Awa, Twa, leeway, rudder and heel 
abs_arr=['Awa','Twa','Leeway','Rudder','Heel']

for variable in abs_arr:
    
    df['abs_'+variable]=abs(df[variable])

In [9]:
# Combinations of features 

df['abs_Awa']=df['abs_Awa']*np.pi/180
df['abs_Twa']=df['abs_Twa']*np.pi/180

df['Bsp_Tws']=df['Bsp']/(df['Tws']+0.0001)# 
df['BspxTws']=df['Bsp']*df['Tws']# not much sense really
df['Bsp_Aws']=df['Bsp']/(df['Aws']+0.0001)#
#df['Leeway_Heel']=df['abs_Leeway']/(df['abs_Heel']+0.0001)#
### noope df['LeewayxHeel']=df['abs_Leeway']*df['abs_Heel']
df['AwaxBsp']=df['Bsp']*df['abs_Awa']
df['Awa_Bsp']=df['Bsp']/(df['abs_Awa']+0.0001)
df['Bsp2']=df['Bsp']**2
df['Bsp3']=df['Bsp']**3
df['Bsp4']=df['Bsp']**4
df['Tws_Heel']=np.log((df['Tws']+0.0001)/(df['abs_Heel']+0.001))

In [10]:
#Standardise relevent features
features = ['Bsp','abs_Awa','abs_Twa','Trim','Forestay','abs_Heel','Tws','Aws','abs_Leeway','abs_Rudder'
            ,'Bsp_Tws','BspxTws','Tws_Heel','Bsp_Aws','AwaxBsp','AwaxBsp','Awa_Bsp','Awa_Bsp','Bsp2','Bsp3'
            ,'Bsp4',]

df = Scale_df(df,features,'standard')

In [11]:
use_features=['label','Utc','Bsp_scaled','abs_Awa_scaled','abs_Heel_scaled','Tws_scaled'
              ,'abs_Leeway_scaled']

df = df[use_features]

In [12]:
df['date'] = df['Utc'].map(lambda x:x.strftime('%Y-%m-%d'))

In [16]:
date_list = df.date.unique()

n_vals = np.linspace(1,len(date_list)-1,len(date_list)-1,dtype=int)

# create arrays to store results in
record_dates = []
record_accuracy = []

for n in n_vals:
    
    # Form a list of all possible combinations of n dates from available dates to use for training data
    use_dates = list(itertools.combinations(date_list,n))
    
    for dates in use_dates:
        
        # Array of other dates to use as test data
        other_dates = np.setdiff1d(date_list,dates)
        
        df_train = df[df['date'].isin(dates)]
        df_test = df[df['date'].isin(other_dates)]
        df_train = df_train.reset_index().drop('index',axis=1)
        df_test = df_test.reset_index().drop('index',axis=1)

        X_train = df_train.drop(['label','date','Utc'],axis=1)
        y_train = df_train['label']
        X_test = df_test.drop(['label','date','Utc'],axis=1)
        y_test = df_test['label']

        clf = SVC(kernel = 'poly')
        clf.fit(X_train,y_train)
        y_pred = clf.predict(X_test)

        result = metrics.accuracy_score(y_test, y_pred)

        record_accuracy.append(result)
        record_dates.append(dates)

        print('-'*100)
        print("Training on dates:")
        for day in dates:
            print(day)
        print("Average accuracy: %.3f"%(result))

----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
Average accuracy: 0.856
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-14
Average accuracy: 0.908
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-15
Average accuracy: 0.823
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-16
Average accuracy: 0.915
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-17
Average accuracy: 0.727
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-18
Average accuracy: 0.904
----------------------------------------------------------------------

----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-16
2015-09-20
Average accuracy: 0.915
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-17
2015-09-18
Average accuracy: 0.909
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-17
2015-09-19
Average accuracy: 0.925
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-17
2015-09-20
Average accuracy: 0.892
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-18
2015-09-19
Average accuracy: 0.906
----------------------------------------------------------------------------------------------------
Training on da

----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-14
2015-09-16
2015-09-17
Average accuracy: 0.914
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-14
2015-09-16
2015-09-18
Average accuracy: 0.912
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-14
2015-09-16
2015-09-19
Average accuracy: 0.915
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-14
2015-09-16
2015-09-20
Average accuracy: 0.919
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-14
2015-09-17
2015-09-18
Average accuracy: 0.919
------------------------------------------------------------

----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-14
2015-09-16
2015-09-18
2015-09-20
Average accuracy: 0.928
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-14
2015-09-16
2015-09-19
2015-09-20
Average accuracy: 0.946
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-14
2015-09-17
2015-09-18
2015-09-19
Average accuracy: 0.934
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-14
2015-09-17
2015-09-18
2015-09-20
Average accuracy: 0.940
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-14
2015-09-17
2015-09-19
2015-09-20
Average accuracy: 0.957
------------------------------------------------------------

----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-15
2015-09-16
2015-09-17
2015-09-20
Average accuracy: 0.914
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-15
2015-09-16
2015-09-18
2015-09-19
Average accuracy: 0.891
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-15
2015-09-16
2015-09-18
2015-09-20
Average accuracy: 0.902
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-15
2015-09-16
2015-09-19
2015-09-20
Average accuracy: 0.921
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-15
2015-09-17
2015-09-18
2015-09-19
Average accuracy: 0.908
-----

----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-14
2015-09-15
2015-09-17
2015-09-18
2015-09-20
Average accuracy: 0.926
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-14
2015-09-15
2015-09-17
2015-09-19
2015-09-20
Average accuracy: 0.956
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-14
2015-09-15
2015-09-18
2015-09-19
2015-09-20
Average accuracy: 0.919
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-14
2015-09-16
2015-09-17
2015-09-18
2015-09-19
Average accuracy: 0.907
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
2015-09-14
2015-09-16
2015-09

In [13]:
# Enhanced Features
use_features=['Bsp_scaled','abs_Awa_scaled','abs_Heel_scaled', 'Aws_scaled'
              ,'Tws_Heel_scaled', 'Bsp_Aws_scaled', 'AwaxBsp_scaled', 'Forestay_scaled']

In [14]:
date_list = df.date.unique()

n_vals = np.linspace(1,len(date_list)-1,len(date_list)-1,dtype=int)

# create arrays to store results in
record_dates2 = []
record_accuracy2 = []

for n in n_vals:
    
    # Form a list of all possible combinations of n dates from available dates to use for training data
    use_dates = list(itertools.combinations(date_list,n))
    
    for dates in use_dates:
        
        # Array of other dates to use as test data
        other_dates = np.setdiff1d(date_list,dates)
        
        df_train = df[df['date'].isin(dates)]
        df_test = df[df['date'].isin(other_dates)]
        df_train = df_train.reset_index().drop('index',axis=1)
        df_test = df_test.reset_index().drop('index',axis=1)

        X_train = df_train.drop(['label','date','Utc'],axis=1)
        y_train = df_train['label']
        X_test = df_test.drop(['label','date','Utc'],axis=1)
        y_test = df_test['label']

        clf = SVC(kernel = 'poly', degree=5)
        clf.fit(X_train,y_train)
        y_pred = clf.predict(X_test)

        result = metrics.accuracy_score(y_test, y_pred)

        record_accuracy2.append(result)
        record_dates2.append(dates)

        print('-'*100)
        print("Training on dates:")
        for day in dates:
            print(day)
        print("Average accuracy: %.3f"%(result))

----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-13
Average accuracy: 0.772
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-14
Average accuracy: 0.734
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-15
Average accuracy: 0.691
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-16
Average accuracy: 0.712
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-17
Average accuracy: 0.780
----------------------------------------------------------------------------------------------------
Training on dates:
2015-09-18
Average accuracy: 0.666
----------------------------------------------------------------------

KeyboardInterrupt: 

In [41]:
i = 0
while i < len(record_accuracy):
    print(record_dates[i])
    print("Basic Features accuracy: %.3f"%(record_accuracy[i]))
    print("Enhanced Features accuracy %.3f\n"%(record_accuracy2[i]))
    
    i+=1
    

('2015-09-13',)
Basic Features accuracy: 0.856
Enhanced Features accuracy 0.736

('2015-09-14',)
Basic Features accuracy: 0.908
Enhanced Features accuracy 0.782

('2015-09-15',)
Basic Features accuracy: 0.823
Enhanced Features accuracy 0.697

('2015-09-16',)
Basic Features accuracy: 0.915
Enhanced Features accuracy 0.767

('2015-09-17',)
Basic Features accuracy: 0.727
Enhanced Features accuracy 0.799

('2015-09-18',)
Basic Features accuracy: 0.904
Enhanced Features accuracy 0.666

('2015-09-19',)
Basic Features accuracy: 0.825
Enhanced Features accuracy 0.655

('2015-09-20',)
Basic Features accuracy: 0.816
Enhanced Features accuracy 0.734

('2015-09-13', '2015-09-14')
Basic Features accuracy: 0.901
Enhanced Features accuracy 0.773

('2015-09-13', '2015-09-15')
Basic Features accuracy: 0.898
Enhanced Features accuracy 0.692

('2015-09-13', '2015-09-16')
Basic Features accuracy: 0.917
Enhanced Features accuracy 0.812

('2015-09-13', '2015-09-17')
Basic Features accuracy: 0.869
Enhanced F

In [None]:
date_list = df.date.unique()



# create arrays to store results in
record_dates3 = []
record_accuracy3 = []

for n in range(1,2):
    
    # Form a list of all possible combinations of n dates from available dates to use for training data
    use_dates = list(itertools.combinations(date_list,n))
    
    for dates in use_dates:
        
        # Array of other dates to use as test data
        other_dates = np.setdiff1d(date_list,dates)
        
        df_train = df[df['date'].isin(dates)]
        df_test = df[df['date'].isin(other_dates)]
        df_train = df_train.reset_index().drop('index',axis=1)
        df_test = df_test.reset_index().drop('index',axis=1)

        X_train = df_train.drop(['label','date','Utc'],axis=1)
        y_train = df_train['label']
        X_test = df_test.drop(['label','date','Utc'],axis=1)
        y_test = df_test['label']

        clf = SVC(kernel = 'poly', degree = 10)
        clf.fit(X_train,y_train)
        y_pred = clf.predict(X_test)

        result = metrics.accuracy_score(y_test, y_pred)

        record_accuracy3.append(result)
        record_dates3.append(dates)

        print('-'*100)
        print("Training on dates:")
        for day in dates:
            print(day)
        print("Average accuracy: %.3f"%(result))

In [None]:
1+1