# Classifying data from samples

### Importing dependencies

In [2]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import pandas as pd
import numpy as np  

### Importing the dataset

In [16]:
samples = pd.read_csv('./samples_data.csv', sep=",", index_col=0)

qttySamples = samples.shape[0]
qttyA = samples.loc[samples['maturity'] == 'A'].shape[0]
qttyB = samples.loc[samples['maturity'] == 'B'].shape[0]
qttyC = samples.loc[samples['maturity'] == 'C'].shape[0]

print('Number of samples: {0}'.format(qttySamples))
print('Number of A samples: {0} - {1}%'.format(qttyA, 100 * qttyA / qttySamples))
print('Number of B samples: {0} - {1}%'.format(qttyB, 100 * qttyB / qttySamples))
print('Number of C samples: {0} - {1}%'.format(qttyC, 100 * qttyC / qttySamples))

samples.head()

Number of samples: 1075
Number of A samples: 771 - 71.72093023255815%
Number of B samples: 200 - 18.6046511627907%
Number of C samples: 104 - 9.674418604651162%


Unnamed: 0,b_max,a_max,a_min,L_median,maturity
2018-05-19 19:19:00,202,163,112,229.0,C
2018-05-19 19:49:08,203,163,112,229.0,C
2018-05-19 20:19:18,204,164,113,229.0,C
2018-05-19 20:49:28,206,165,113,229.0,C
2018-05-19 21:19:38,205,163,112,229.0,C


### Preparing dataset

In [11]:
x = samples.drop('maturity', axis=1)  # Attributes
y = samples['maturity']               # Labels

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.20)  

### Preparing classifier

In [17]:
svclassifier = SVC(kernel='linear')  
svclassifier.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

### Classifying and verifying results

In [20]:
y_pred = svclassifier.predict(X_test)

print(confusion_matrix(y_test,y_pred))  
print(classification_report(y_test,y_pred))

[[152   0   0]
 [  7  37   0]
 [  0   1  18]]
             precision    recall  f1-score   support

          A       0.96      1.00      0.98       152
          B       0.97      0.84      0.90        44
          C       1.00      0.95      0.97        19

avg / total       0.96      0.96      0.96       215

