In [18]:
import numpy as np
from gendis.genetic import GeneticExtractor
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from data_extraction import DataExtractor
from trajectory_generator import TrajectoryGenerator
from utils import standardize_data

np.random.seed(1337)  # Random seed for reproducibility

In [19]:
# Create files if not created
reset_data = False
#reset_data = True
tr_gen = TrajectoryGenerator(samples=100, reset_data = reset_data)
tr_gen.data_generation(n_test=10)


TypeError: __init__() got an unexpected keyword argument 'reset_data'

In [3]:
# Read in the datafiles
dex = DataExtractor()
train_df, test_df = dex.train_test_dataframes()
dex.define_csv(dataset=train_df, ts_class=["Bearing", "Speed"], file="train.csv")
dex.define_csv(dataset=test_df, ts_class=["Bearing", "Speed"], file="test.csv")


Reading the data files
Done reading files

Creating datasets for train and test files
Success

Creating train.csv 
Success

Creating test.csv 
Success


In [13]:
x_train, y_train, x_test, y_test = dex.load_datasets()

print("standardized train and test data")
x_train, x_test = standardize_data(x_train, x_test)


Loading the csv files to the appropriate train and test arrays(nparrays)
Done
standardized train and test data


In [14]:
genetic_extractor = GeneticExtractor(population_size=25, iterations=25, verbose=True,
                                     normed=True, add_noise_prob=0.3, add_shapelet_prob=0.3,
                                     wait=10, plot=None, remove_shapelet_prob=0.3,
                                     crossover_prob=0.66, n_jobs=4)
print("Starting fit in genetic extractor with:\n"
      "population size:{0:d}\n"
      "iterations: {1:d}\n"
      "normed: {2}\n".format(genetic_extractor.population_size,genetic_extractor.iterations,genetic_extractor.normed))

genetic_extractor.fit(x_train, y_train)


Starting fit in genetic extractor with:
population size:25
iterations: 25
normed: 1

it		avg		std		max		time
1		-0.5693		0.085		-0.400223	2.4022
2		-0.4707		0.072		-0.387037	3.0968
3		-0.4585		0.092		-0.366297	2.2962
4		-0.411		0.073		-0.342757	2.3227
5		-0.3668		0.023		-0.342757	2.7741
6		-0.3467		0.011		-0.334479	3.9471
7		-0.3468		0.024		-0.30398	3.12
8		-0.3458		0.056		-0.303032	3.924
9		-0.3348		0.026		-0.299578	3.1266
10		-0.3402		0.019		-0.299578	3.3601
11		-0.3315		0.02		-0.299578	5.2551
12		-0.3335		0.03		-0.288574	4.0793
13		-0.3153		0.027		-0.282706	4.0261
14		-0.2774		0.015		-0.26184	2.9714
15		-0.2694		0.01		-0.255012	5.2215
16		-0.2581		0.012		-0.24274	4.9224
17		-0.2429		0.015		-0.22065	4.6182
18		-0.228		0.009		-0.218978	4.6701
19		-0.2265		0.006		-0.217591	4.7345
20		-0.2167		0.007		-0.209729	4.7861
21		-0.2154		0.006		-0.209729	5.7182
22		-0.2125		0.003		-0.207886	7.6518
23		-0.2143		0.003		-0.207886	6.4991
24		-0.2075		0.009		-0.191763	5.6319
25		-0.207		0.007		-0.19

In [15]:
distances_train = genetic_extractor.transform(x_train)
distances_test = genetic_extractor.transform(x_test)

lr = LogisticRegression()
lr.fit(distances_train, y_train)

# Print the accuracy score on the test set
print('Accuracy = {}'.format(accuracy_score(y_test, lr.predict(distances_test))))

Accuracy = 0.675
