In [2]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
import numpy as np
import pandas as pd
from statistics import mean
from random import choice
%matplotlib inline
import matplotlib.pyplot as plt

from gendis.genetic import GeneticExtractor
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from tools.data_extraction import DataExtractor
from tools.trajectory_generator import TrajectoryGenerator
from tools.utils import standardize_data, print_genetic_param, print_settings, set_movements, angle_diff,scale_down, get_distance
from tools.experiments import Experiments

np.random.seed(1337)  # Random seed for reproducibility

tr_gen_options = {"samples": 25,
                  "freq": 3,
                  "reset_data": True}
dt_gen_options = {"n_test": 150}

train_test_options = {"split": 25}

df_csv_options = {"ts_class": "Bearing"}

gen_options = {"population_size": 20,
               "iterations": 20,
               "verbose": True,
               "normed": True,
               "add_noise_prob": 0.0,
               "add_shapelet_prob": 0.3,
               "wait": 10,
               "plot": None,
               "remove_shapelet_prob": 0.3,
               "crossover_prob": 0.66,
               "n_jobs": 4}

settings = {"trajectory_generator_options": tr_gen_options,
            "data_generation_options": dt_gen_options,
            "train_test_options":train_test_options,
            "define_csvs_option": df_csv_options,
            "genetic_options": gen_options}

tr_gen_options = settings["trajectory_generator_options"]
dt_gen_options = settings["data_generation_options"]
df_csvs_options = settings["define_csvs_option"]
train_test_options = settings["train_test_options"]
genetic_options = settings["genetic_options"]

first_movement = ['creeping_line_left']
second_movement = ['random']
movements = {'first_movement': first_movement,'second_movement': second_movement}
set_movements(movements)


In [3]:
# Create files if not created
tr_gen = TrajectoryGenerator(**tr_gen_options)
tr_gen.data_generation(**dt_gen_options)
# Read in the datafiles
dex = DataExtractor()
train_df, test_df = dex.train_test_dataframes(**train_test_options)
print("The train samples length is:{0}".format(len(train_df[0] * train_test_options["split"]*2)))
print("The test samples length is:{0}\n".format(len(test_df[0] * train_test_options["split"]*2)))
dex.define_csv(**df_csvs_options)

x_train, y_train, x_test, y_test = dex.load_datasets()

2019-06-18 18:45:25.486 | INFO     | tools.trajectory_generator:data_generation:623 - 
 Starting the generator with attributes: 
Original latitude: 37.295493
Original longitude: 23.824322
Initial bearing: 90
Initial speed: 10
Number of samples: 25
Starting time of measurements: 2015-02-01 12:00:00
With initial frequency of collected data: 3 min
and hard reset of data: True
2019-06-18 18:45:25.489 | INFO     | tools.trajectory_generator:data_generation:626 - Create directory 'generator_data' 
2019-06-18 18:45:25.491 | INFO     | tools.trajectory_generator:data_generation:632 - now creating data for movement: creeping_line_left
2019-06-18 18:45:34.295 | INFO     | tools.trajectory_generator:data_generation:637 - now creating data for movement: random
2019-06-18 18:46:04.721 | SUCCESS  | tools.trajectory_generator:data_generation:670 - Done with generator
2019-06-18 18:46:04.723 | INFO     | tools.data_extraction:read_datasets:22 - Reading the data files
2019-06-18 18:46:05.491 | SUCCESS 

The train samples length is:6000
The test samples length is:1500



In [4]:
x_train, y_train, x_test, y_test = dex.load_datasets()
labels = ["TIMESTAMP","LAT","LON","HEADING"]
real_data =  pd.read_csv("/home/kapadais/github/hua-thesis/data/route.csv", engine='python')
real_data = real_data [labels]
real_data.sort_values('TIMESTAMP',inplace=True)
real_data=real_data.reset_index(drop=True)
data = scale_down(real_data,train_test_options["split"])
y_test = np.array([0,1])
a=np.array(data["HEADING"].values).astype(int)
a = a.reshape(1, -1)
a=preprocessing.normalize(a,axis=1)
# b=x_test[1].reshape(1,-1)
# b=preprocessing.normalize(b,axis=1)
# x_test=np.array([a,b]).reshape(2,25)
print(x_test.shape)

2019-06-18 18:46:06.770 | INFO     | tools.data_extraction:load_datasets:129 - Loading the csv files to the appropriate train and test arrays(nparrays)
2019-06-18 18:46:06.787 | SUCCESS  | tools.data_extraction:load_datasets:134 - Done


(60, 25)


In [6]:
a.shape[0]

1

In [None]:
# x_train_anglediff=angle_diff(x_train)
# x_test_anglediff=angle_diff(x_test)


In [None]:
# # Visualize the timeseries in the train and test set
# colors = ['r', 'b', 'g', 'y', 'c']
# plt.figure(figsize=(20, 10))
# for ts, label in zip(x_train, y_train):
#     plt.plot(range(len(ts)), ts, c=colors[int(label%len(colors))])
# plt.title('The timeseries in the train set')
# plt.show()

# plt.figure(figsize=(20, 10))
# for ts, label in zip(x_test, y_test):
#     plt.plot(range(len(ts)), ts, c=colors[int(label%len(colors))])
# plt.title('The timeseries in the test set')
# plt.show()

In [26]:
print("standardized train and test data\n")
x_train, x_test = standardize_data(x_train, x_test)
genetic_extractor = GeneticExtractor(**genetic_options)
# genetic_extrator_anglediff = GeneticExtractor(**genetic_options)
print_genetic_param(genetic_extractor)
genetic_extractor.fit(x_train, y_train)
# genetic_extrator_anglediff.fit(x_train_anglediff,y_train)

standardized train and test data

it		avg		std		max		time
1		-0.3836		0.106		-0.228656	6.1576
2		-0.3199		0.069		-0.228656	11.0879
3		-0.272		0.069		-0.208877	9.2709
4		-0.2101		0.036		-0.14546	9.8628
5		-0.1785		0.034		-0.135873	10.377
6		-0.171		0.025		-0.135873	12.7805
7		-0.1573		0.033		-0.119785	15.6453
8		-0.1432		0.027		-0.119785	12.9931
9		-0.129		0.027		-0.104198	14.1547
10		-0.1192		0.026		-0.097008	18.2431
11		-0.1213		0.023		-0.097008	16.9289
12		-0.1282		0.022		-0.097008	16.3837
13		-0.1252		0.032		-0.097008	21.6572
14		-0.1443		0.038		-0.097008	20.1195
15		-0.1327		0.022		-0.097008	23.6116
16		-0.1303		0.024		-0.097008	19.2282
17		-0.1346		0.015		-0.097008	22.8631
18		-0.1184		0.025		-0.093399	23.4443
19		-0.1052		0.015		-0.092048	20.653
20		-0.1048		0.019		-0.085291	20.7716


In [27]:
distances_train = genetic_extractor.transform(x_train)
distances_test = genetic_extractor.transform(x_test)

# distances_train_anglediff = genetic_extrator_anglediff.transform(x_train_anglediff)
# distances_test_anglediff= genetic_extrator_anglediff.transform(x_test_anglediff)

# mixed_distances_train = np.concatenate((distances_train,distances_train_anglediff),axis=1)
# mixed_distances_test = np.concatenate((distances_test,distances_test_anglediff),axis=1) 
lr = LogisticRegression()
# lr.fit(mixed_distances_train, y_train)
lr.fit(distances_train,y_train)

# Print the accuracy score on the test set
# accuracy_result = accuracy_score(y_test, lr.predict(mixed_distances_test))
accuracy_result = accuracy_score(y_test, lr.predict(distances_test))
print('Accuracy = {}'.format(accuracy_result))

Accuracy = 0.5


In [28]:
lr.predict(distances_test)

array([1, 1])

In [29]:
lr.predict_proba(distances_test)

array([[0.06713114, 0.93286886],
       [0.06240788, 0.93759212]])