# Imports

In [27]:
import pandas as pd
import arff
from sklearn.preprocessing import MinMaxScaler, StandardScaler, Normalizer, Binarizer, scale, OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score



# Read data

* **English version**: This code snippet loads a file in ARFF format ('data/phpPrh7lv.arff'), reads its contents and stores them in a data structure using the arff library. It then creates a Pandas DataFrame with the data extracted from the ARFF file, using the columns specified in the attributes file.

* **Portuguese version**: Este trecho de código carrega um arquivo no formato ARFF ('data/phpPrh7lv.arff'), lê seu conteúdo e o armazena em uma estrutura de dados usando a biblioteca arff. Em seguida, ele cria um DataFrame do Pandas com os dados extraídos do arquivo ARFF, usando as colunas especificadas no arquivo de atributos.

In [28]:
with open('data/phpPrh7lv.arff', 'r') as file:
    arff_data = arff.load(file)

data = pd.DataFrame(arff_data['data'], columns=[i[0] for i in arff_data['attributes']])
data

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,Class
0,15.26,14.84,0.8710,5.763,3.312,2.221,5.220,1
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1
2,14.29,14.09,0.9050,5.291,3.337,2.699,4.825,1
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,1
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1
...,...,...,...,...,...,...,...,...
205,12.19,13.20,0.8783,5.137,2.981,3.631,4.870,3
206,11.23,12.88,0.8511,5.140,2.795,4.325,5.003,3
207,13.20,13.66,0.8883,5.236,3.232,8.315,5.056,3
208,11.84,13.21,0.8521,5.175,2.836,3.598,5.044,3


# Pré-processamento

In [29]:
data_without_nan = data.dropna()
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(data_without_nan)
data_normalized = pd.DataFrame(scaled_data, columns=[i[0] for i in arff_data['attributes']])


# Serapação de dados

In [30]:

train, test = train_test_split(data_normalized, test_size=0.047, random_state=1)

# Técnicas de aprendizado

## Árvore de decisão

In [31]:
x_train = train.drop('Class', axis=1)
y_train = train['Class'].astype('int')

decision_tree = DecisionTreeClassifier()
decision_tree.fit(x_train, y_train)


x_test = test.drop('Class', axis=1)
predictions = decision_tree.predict(x_test)
# 5 - Decision Tree
predictions

array([0, 1, 0, 1, 0, 0, 0, 1, 1, 1])

In [32]:

bayes_classifier = GaussianNB()
bayes_classifier.fit(x_train, y_train)

bayes_predictions = bayes_classifier.predict(x_test)
# 5 - Naive Bayes
bayes_predictions

array([0, 1, 0, 1, 0, 0, 0, 1, 1, 1])

In [33]:
svm_classifier = SVC()
svm_classifier.fit(x_train, y_train)

svm_predictions = svm_classifier.predict(x_test)

# 5 - SVM 
svm_predictions

array([0, 1, 0, 1, 0, 0, 0, 1, 1, 1])

# Acurácia

In [43]:
answer = test['Class'].astype('int').values


tree = accuracy_score(answer, predictions)
bayes = accuracy_score(answer, bayes_predictions)
svm = accuracy_score(answer, svm_predictions)

print('Decision Tree: ', tree)
print('Naive Bayes: ', bayes)
print('SVM: ', svm)



Decision Tree:  1.0
Naive Bayes:  1.0
SVM:  1.0
