# Scenario 3a: 3 beacon, 16 locations, Support Vector Classifier

In [4]:
# Import modules
from sklearn import datasets 
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import cross_val_score, GridSearchCV 
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np

In [5]:
# Function to visualize the scores with mean and standard deviation for model
# comparison
def Average(lst): 
    return sum(lst) / len(lst)

def display_scores(scores):
    count = 0
    avg =[]
    for score in scores:
        count = count + 1
        avg.append(score)
        #print(f"CV - {count} --> {score}")
    print("")
    print("-------------------------------------")
    print(f"Average accuracy --> {round(Average(avg),2)}") 
    print("-------------------------------------")

In [6]:
# Load training set
src = "/home/victor/Escritorio/tfg/old/tres_apes/"

f_name = src + "salida.csv"
train = pd.read_csv(f_name, header = 0)

# Load test set
src1 = "/home/victor/Escritorio/tfg/old/tres_apes/"

f_name_1 = src1 + "data_3node_test_final.csv"
test = pd.read_csv(f_name_1, header = 0)

In [7]:
# Separate out feature variables and target variable, training set
X_train = train.drop(['Label'], axis=1)
y_train = train["Label"]
# Separate out feature variables and target variable, test set
X_test = test.drop(['Label'], axis=1)
y_test = test["Label"]
#X_test.head(10) Debug

In [8]:
#Debug, size of training and test set
print(f"Size of training set: {len(X_train.index)}")
print(f"Size of test set: {len(X_test.index)}")
#len(DataFrame.index)

Size of training set: 1920
Size of test set: 320


## Linear Kernel

In [9]:
param_grid = {'C':[1], 'kernel':['linear']}
grid = GridSearchCV(SVC(), param_grid,refit = True, verbose= 0, cv = 3)
grid.fit(X_train,y_train)

Cbest = grid.best_params_['C']
# SVM with linear kernel and and best fit parameters
print(f"Best C parameter --> {Cbest}")

# Make predictions, Linear Kernel
clf= SVC(kernel = 'linear', C = Cbest, decision_function_shape='ovr', gamma='scale').fit(X_train, y_train)
clf_predictions = clf.predict(X_test)
#print(f"Actual classes:     {np.asarray(y_test)}")
#print(f"Predicted classes:  {clf_predictions}")

# Print accuracy and best parameter
target_names = ['D1', 'D2', 'D3', 'D4', 'D5','D6', 'D7', 'D8', 'D9', 'D10','D11', 'D12', 'D13', 'D14', 'D15','D16' ]
#a = classification_report(y_test, clf_predictions, digits=2, output_dict = False, target_names=target_names)
a = accuracy_score(y_test, clf_predictions)
print("")
print("-------------------------------------")
print(f"Accuracy --> {a}")
print("-------------------------------------")

scores = cross_val_score(clf, X_train,y_train,cv = 10, scoring='accuracy')
display_scores(scores)

Best C parameter --> 1

-------------------------------------
Accuracy --> 0.821875
-------------------------------------

-------------------------------------
Average accuracy --> 0.91
-------------------------------------


## Radial Basis Kernel

In [10]:
param_grid = {'C':[50],'gamma':[0.01], 'kernel':['rbf']}
grid = GridSearchCV(SVC(), param_grid, refit = True, verbose= 0, cv = 3)
grid.fit(X_train,y_train)

# Load bests C from best fit
Cbest = grid.best_params_['C']
gamma_best = grid.best_params_['gamma']

print(f"Best C parameter --> {Cbest}")
print(f"Best gamma parameter --> {gamma_best}")

clf= SVC(kernel = 'rbf', C = Cbest, gamma=gamma_best, decision_function_shape='ovr').fit(X_train, y_train)
# Make predictions, RBF Kernel
clf_predictions = clf.predict(X_test)

# Print accuracy and best parameter
target_names = ['D1', 'D2', 'D3', 'D4', 'D5','D6', 'D7', 'D8', 'D9', 'D10','D11', 'D12', 'D13', 'D14', 'D15','D16' ]
#a = classification_report(y_test, clf_predictions, digits=2, output_dict = False, target_names=target_names)
a = accuracy_score(y_test, clf_predictions)
print(f"Accuracy --> {a}")

# Print out cross-validation scores
scores = cross_val_score(clf, X_train,y_train,cv = 10, scoring='accuracy')
display_scores(scores)

Best C parameter --> 50
Best gamma parameter --> 0.01
Accuracy --> 0.86875

-------------------------------------
Average accuracy --> 0.93
-------------------------------------


## Scenario 3b: 3 node, 16 locations, Decision Tree

In [16]:
param_grid = {'criterion':['gini','entropy'],'max_depth':[10]}
grid = GridSearchCV(DecisionTreeClassifier(), param_grid,refit = True, verbose= 0, cv = 3)
grid.fit(X_train,y_train)

max_best = grid.best_params_['max_depth']
criterion_best = grid.best_params_['criterion']
print(f"Best max_depth parameter --> {max_best}")
print(f"Best max_depth parameter --> {criterion_best}")

clf= DecisionTreeClassifier(criterion=criterion_best, max_depth=max_best).fit(X_train, y_train)
clf_predictions = clf.predict(X_test)

# Print accuracy and best parameter
target_names = ['D1', 'D2', 'D3', 'D4', 'D5','D6', 'D7', 'D8', 'D9', 'D10','D11', 'D12', 'D13', 'D14', 'D15','D16']
#a = classification_report(y_test, clf_predictions, digits=2, output_dict = False, target_names=target_names)
a = accuracy_score(y_test, clf_predictions)
print(f"Accuracy --> {a}")

# Print out cross-validation scores
scores = cross_val_score(clf, X_train,y_train,cv = 10, scoring='accuracy')
display_scores(scores)

Best max_depth parameter --> 10
Best max_depth parameter --> entropy
Accuracy --> 0.784375

-------------------------------------
Average accuracy --> 0.92
-------------------------------------


## Scenario 3c: 3 node, 16 locations, RandomForest Classifier

In [12]:
param_grid = {'bootstrap': [True],'max_depth': [50],'max_features': ['sqrt'],'min_samples_leaf': [1,2,3],'min_samples_split': [5,7,9,10],'n_estimators': [70,90,100]}
grid = GridSearchCV(RandomForestClassifier(), param_grid,refit = True, verbose= 0, cv = 3)
grid.fit(X_train,y_train)

depth_best = grid.best_params_['max_depth']
max_feature_best = grid.best_params_['max_features']
min_leaf_best = grid.best_params_['min_samples_leaf']
min_split_best = grid.best_params_['min_samples_split']
n_estimator_best = grid.best_params_['n_estimators']

print(f"Best max_depth parameter --> {depth_best}")
print(f"Best max_feature parameter --> {max_feature_best}")
print(f"Best min_leaf parameter --> {min_leaf_best}")
print(f"Best min_split parameter --> {min_split_best}")
print(f"Best n_estimator parameter --> {n_estimator_best}")

clf= RandomForestClassifier(n_estimators = n_estimator_best,criterion='gini', max_depth = depth_best, max_features = max_feature_best, min_samples_leaf = min_leaf_best,min_samples_split = min_split_best).fit(X_train, y_train)
clf_predictions = clf.predict(X_test)

# Print accuracy and best parameter
target_names = ['D1', 'D2', 'D3', 'D4', 'D5','D6', 'D7', 'D8', 'D9', 'D10','D11', 'D12', 'D13', 'D14', 'D15','D16']
#a = classification_report(y_test, clf_predictions, digits=2, output_dict = False, target_names=target_names)
a = accuracy_score(y_test, clf_predictions)
print(f"Accuracy --> {a}")

# Print out cross-validation scores
scores = cross_val_score(clf, X_train,y_train,cv = 10, scoring='accuracy')
display_scores(scores)

Best max_depth parameter --> 50
Best max_feature parameter --> sqrt
Best min_leaf parameter --> 1
Best min_split parameter --> 7
Best n_estimator parameter --> 90
Accuracy --> 0.828125

-------------------------------------
Average accuracy --> 0.94
-------------------------------------
