In [1]:
import pandas as pd
import numpy as np
from sklearn.externals import joblib
from utils.classifications_utils import *
from utils.data_processing_utils import *
from utils.data_visualization_utils import *
from utils.metrics_utils import *
from utils.grid_search_utils import *
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [2]:
def remove_white_noise(white_noise, data):
    smoothed_data = {}
    
    for wn, d in zip(white_noise.items(), data.items()):
        smoothed_data[wn[0]] = d[1] - wn[1]
        
    return smoothed_data

In [4]:
no_wind_data = load_data(0, 3, "drone1")
no_wind_data = separate_data_based_on_apparatus(no_wind_data)

level_1_wind = load_data(1, 3, "drone1")
level_1_wind = separate_data_based_on_apparatus(level_1_wind)

level_3_wind = load_data(3, 3, "drone1")
level_3_wind = separate_data_based_on_apparatus(level_3_wind)

level_6_wind = load_data(6, 3, "drone1")
level_6_wind = separate_data_based_on_apparatus(level_6_wind)

level_8_wind = load_data(8, 3, "drone1")
level_8_wind = separate_data_based_on_apparatus(level_8_wind)

In [5]:
smoothed_level_1_data = remove_white_noise(no_wind_data, level_1_wind)
smoothed_level_3_data = remove_white_noise(no_wind_data, level_3_wind)
smoothed_level_6_data = remove_white_noise(no_wind_data, level_6_wind)
smoothed_level_8_data = remove_white_noise(no_wind_data, level_8_wind)

In [6]:
no_wind_transformed = transformed_all_data(no_wind_data, 0)
level_1_wind_transformed = transformed_all_data(smoothed_level_1_data, 1)
level_3_wind_transformed = transformed_all_data(smoothed_level_3_data, 2)
level_6_wind_transformed = transformed_all_data(smoothed_level_6_data, 3)
level_8_wind_transformed = transformed_all_data(smoothed_level_8_data, 4)

acc_no_wind, gyro_no_wind, mag_no_wind, stabilizer_no_wind = tuple(no_wind_transformed)
acc_level_1_wind, gyro_level_1_wind, mag_level_1_wind, stabilizer_level_1_wind = tuple(level_1_wind_transformed)
acc_level_3_wind, gyro_level_3_wind, mag_level_3_wind, stabilizer_level_3_wind = tuple(level_3_wind_transformed)
acc_level_6_wind, gyro_level_6_wind, mag_level_6_wind, stabilizer_level_6_wind = tuple(level_6_wind_transformed)
acc_level_8_wind, gyro_level_8_wind, mag_level_8_wind, stabilizer_level_8_wind = tuple(level_8_wind_transformed)

In [7]:
df_acc = acc_no_wind.append(acc_level_1_wind, ignore_index=True)
df_acc = df_acc.append(acc_level_3_wind, ignore_index=True)
df_acc = df_acc.append(acc_level_6_wind, ignore_index=True)
df_acc = df_acc.append(acc_level_8_wind, ignore_index=True)
X_acc = df_acc.iloc[:, :-1]
y_acc = df_acc["label"]
print("Accelerometer: " + str(df_acc.shape[0]))
df_acc.to_csv("smoothed_acc_transformed_1_min.csv")

Accelerometer: 89505


In [8]:
df_gyro = gyro_no_wind.append(gyro_level_1_wind, ignore_index=True)
df_gyro = df_gyro.append(gyro_level_3_wind, ignore_index=True)
df_gyro = df_gyro.append(gyro_level_6_wind, ignore_index=True)
df_gyro = df_gyro.append(gyro_level_8_wind, ignore_index=True)
X_gyro = df_gyro.iloc[:, :-1]
y_gyro = df_gyro["label"]
print("Gyro: " + str(df_gyro.shape[0]))
df_gyro.to_csv("smoothed_gyro_transformed_1_min.csv")

Gyro: 89505


In [9]:
df_sta = stabilizer_no_wind.append(stabilizer_level_1_wind, ignore_index=True)
df_sta = df_sta.append(stabilizer_level_3_wind, ignore_index=True)
df_sta = df_sta.append(stabilizer_level_6_wind, ignore_index=True)
df_sta = df_sta.append(stabilizer_level_8_wind, ignore_index=True)
X_sta = df_sta.iloc[:, :-1]
y_sta = df_sta["label"]
print("Stabilizer: " + str(df_sta.shape[0]))
df_sta.to_csv("smoothed_sta_transformed_1_min.csv")

Stabilizer: 89505


In [10]:
no_wind_data = load_data(0, 1, "drone2")
no_wind_data = separate_data_based_on_apparatus(no_wind_data)

level_1_wind = load_data(1, 1, "drone2")
level_1_wind = separate_data_based_on_apparatus(level_1_wind)

level_3_wind = load_data(3, 1, "drone2")
level_3_wind = separate_data_based_on_apparatus(level_3_wind)

level_6_wind = load_data(6, 1, "drone2")
level_6_wind = separate_data_based_on_apparatus(level_6_wind)

level_8_wind = load_data(8, 1, "drone2")
level_8_wind = separate_data_based_on_apparatus(level_8_wind)

In [11]:
smoothed_level_1_data_2 = remove_white_noise(no_wind_data, level_1_wind)
smoothed_level_3_data_2 = remove_white_noise(no_wind_data, level_3_wind)
smoothed_level_6_data_2 = remove_white_noise(no_wind_data, level_6_wind)
smoothed_level_8_data_2 = remove_white_noise(no_wind_data, level_8_wind)

In [12]:
no_wind_transformed = transformed_all_data(no_wind_data, 0)
level_1_wind_transformed = transformed_all_data(smoothed_level_1_data_2, 1)
level_3_wind_transformed = transformed_all_data(smoothed_level_3_data_2, 2)
level_6_wind_transformed = transformed_all_data(smoothed_level_6_data_2, 3)
level_8_wind_transformed = transformed_all_data(smoothed_level_8_data_2, 4)

acc_no_wind, gyro_no_wind, mag_no_wind, stabilizer_no_wind = tuple(no_wind_transformed)
acc_level_1_wind, gyro_level_1_wind, mag_level_1_wind, stabilizer_level_1_wind = tuple(level_1_wind_transformed)
acc_level_3_wind, gyro_level_3_wind, mag_level_3_wind, stabilizer_level_3_wind = tuple(level_3_wind_transformed)
acc_level_6_wind, gyro_level_6_wind, mag_level_6_wind, stabilizer_level_6_wind = tuple(level_6_wind_transformed)
acc_level_8_wind, gyro_level_8_wind, mag_level_8_wind, stabilizer_level_8_wind = tuple(level_8_wind_transformed)

In [31]:
df_acc_2 = acc_no_wind.append(acc_level_1_wind, ignore_index=True)
df_acc_2 = df_acc_2.append(acc_level_3_wind, ignore_index=True)
df_acc_2 = df_acc_2.append(acc_level_6_wind, ignore_index=True)
df_acc_2 = df_acc_2.append(acc_level_8_wind, ignore_index=True)
X_acc_2 = df_acc_2.iloc[:, :-1]
y_acc_2 = df_acc_2["label"]

In [32]:
X_acc_2.shape

(29505, 40)

In [14]:
param_grid = {
    "n_estimators": [10, 150],
    "max_depth": [None, 20],
    "max_features": [0.1],
    "min_samples_split": [2],
    "min_samples_leaf": [1],
    "max_leaf_nodes": [None, 900],
    "min_weight_fraction_leaf": [0]
            }
clf = RandomForestClassifier(random_state=0)

best_model_rf = model_selection(clf, param_grid, X_acc, y_acc)

In [15]:
joblib.dump(best_model_rf, 'random_forest_acc_smoothed.pkl')
pickled_clf = joblib.load('random_forest_acc_smoothed.pkl')

In [16]:
report(pickled_clf.grid_scores_, 2)

Model with rank: 1
Mean validation score: 0.5899, Standard Deviation: 0.5899
Parameters: {'max_depth': None, 'max_features': 0.1, 'max_leaf_nodes': 900, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0, 'n_estimators': 150}

Model with rank: 2
Mean validation score: 0.5890, Standard Deviation: 0.5890
Parameters: {'max_depth': 20, 'max_features': 0.1, 'max_leaf_nodes': 900, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0, 'n_estimators': 150}



In [33]:
y_pred = pickled_clf.predict(X_acc_2)
pickled_clf.score(X_acc_2, y_acc_2)

0.40199966107439417

In [34]:
get_confusion_matrix(y_pred, y_acc_2)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,5901,0,0,0,0
predicted_1,0,46,0,1071,0
predicted_2,0,8,0,500,0
predicted_3,0,0,0,13,0
predicted_4,0,5847,5901,4317,5901


In [37]:
param_grid = {
    "n_estimators": [10, 150],
    "max_depth": [None, 20],
    "max_features": [0.1],
    "min_samples_split": [2],
    "min_samples_leaf": [1],
    "max_leaf_nodes": [None, 900],
    "min_weight_fraction_leaf": [0]
            }
clf = RandomForestClassifier(random_state=0)

best_model_rf = model_selection(clf, param_grid, X_sta, y_sta)

In [38]:
joblib.dump(best_model_rf, 'random_forest_sta_smoothed.pkl')
pickled_clf = joblib.load('random_forest_sta_smoothed.pkl')

In [39]:
report(pickled_clf.grid_scores_, 2)

Model with rank: 1
Mean validation score: 0.4756, Standard Deviation: 0.4756
Parameters: {'max_depth': 20, 'max_features': 0.1, 'max_leaf_nodes': 900, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0, 'n_estimators': 150}

Model with rank: 2
Mean validation score: 0.4747, Standard Deviation: 0.4747
Parameters: {'max_depth': None, 'max_features': 0.1, 'max_leaf_nodes': 900, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0, 'n_estimators': 150}



In [41]:
df_sta_2 = stabilizer_no_wind.append(stabilizer_level_1_wind, ignore_index=True)
df_sta_2 = df_sta_2.append(stabilizer_level_3_wind, ignore_index=True)
df_sta_2 = df_sta_2.append(stabilizer_level_6_wind, ignore_index=True)
df_sta_2 = df_sta_2.append(stabilizer_level_8_wind, ignore_index=True)
X_sta_2 = df_sta_2.iloc[:, :-1]
y_sta_2 = df_sta_2["label"]

In [42]:
y_pred = pickled_clf.predict(X_sta_2)
pickled_clf.score(X_sta_2, y_sta_2)

0.3938654465344857

In [43]:
get_confusion_matrix(y_pred, y_sta_2)

Unnamed: 0,actual_0,actual_1,actual_2,actual_3,actual_4
predicted_0,5901,222,216,567,245
predicted_1,0,1264,1293,1311,1212
predicted_2,0,2155,2375,2149,2240
predicted_3,0,487,531,776,899
predicted_4,0,1773,1486,1098,1305


In [44]:
X_sta_2.shape

(29505, 40)