In [5]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

from os import listdir

%matplotlib inline

In [6]:
folder = '/Users/Cota/Documents/ML/Course_JosePortilla/Cesar/Artificial/Matrices/'

In [7]:
list_files = [f for f in listdir(folder) if f.endswith('.txt') and 'Cloud' in f]
list_files.sort()

In [8]:
Matrix = np.loadtxt(folder+list_files[0])

In [9]:
Clouds = np.zeros((len(list_files),40000))

In [10]:
for i in range(len(list_files)):
    Clouds[i,:] = np.loadtxt(folder+list_files[i]).reshape(-1) 

# Train and test data split

In [None]:
from sklearn.model_selection import train_test_split

In [84]:
X = Clouds/Clouds.max()
X = pd.DataFrame(data=X, dtype='float64')

In [12]:
y = np.loadtxt(folder+'Target.txt')

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [30]:
y_train.shape

(140,)

# Model selection

**Random forest**

In [44]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier

In [45]:
forest = RandomForestClassifier(n_estimators=300)

In [46]:
forest.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=300,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [49]:
predictions_forest = forest.predict(X_test)

In [50]:
print (confusion_matrix(y_test, predictions_forest))
print('\n')
print (classification_report(y_test, predictions_forest))

[[18  0]
 [ 1 41]]


              precision    recall  f1-score   support

         0.0       0.95      1.00      0.97        18
         1.0       1.00      0.98      0.99        42

    accuracy                           0.98        60
   macro avg       0.97      0.99      0.98        60
weighted avg       0.98      0.98      0.98        60



**Support vector classification**

In [54]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [55]:
svc = SVC()

In [56]:
svc.fit(X_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [57]:
predictions_svc = svc.predict(X_test)

In [65]:
print (confusion_matrix(y_test, predictions_svc))
print('\n')
print (classification_report(y_test, predictions_svc))

[[18  0]
 [ 8 34]]


              precision    recall  f1-score   support

         0.0       0.69      1.00      0.82        18
         1.0       1.00      0.81      0.89        42

    accuracy                           0.87        60
   macro avg       0.85      0.90      0.86        60
weighted avg       0.91      0.87      0.87        60



[[18  0]
 [ 8 34]]


**Optimize the parameters**

In [79]:
param_grid1 = dict(C=[0.1,0.5,1,5,10,100,1000],gamma = [10,5,2,1,1e-1,1e-2,1e-3,1e-4])
grid = GridSearchCV(SVC(),param_grid=param_grid1)

In [80]:
grid.fit(X_train, y_train)

GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.1, 0.5, 1, 5, 10, 100, 1000],
                         'gamma': [10, 5, 2, 1, 0.1, 0.01, 0.001, 0.0001]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [81]:
predictions_grid = grid.predict(X_test)

In [82]:
grid.best_params_

{'C': 1, 'gamma': 1}

In [83]:
print (confusion_matrix(y_test, predictions_grid))
print('\n')
print (classification_report(y_test, predictions_grid))

[[18  0]
 [ 1 41]]


              precision    recall  f1-score   support

         0.0       0.95      1.00      0.97        18
         1.0       1.00      0.98      0.99        42

    accuracy                           0.98        60
   macro avg       0.97      0.99      0.98        60
weighted avg       0.98      0.98      0.98        60

