In [1]:
from perceptron import *
import pandas as pd
import numpy as np

In [2]:
iris_dataset = pd.read_csv('files/iris.csv')

In [3]:
iris_validation = run_perceptron_validation_iris(
                                            dataset=iris_dataset, 
                                            column_x='Species', 
                                            column_y='setosa', 
                                            test_size=0.1)
iris_validation

{'predictions': [1, -1, -1, 1, -1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1],
 'prediction_success': '100.0%'}

# Tarefa 1 - Reexecutar train/test/split

In [4]:
iris_validation_2 = run_perceptron_validation_iris(
                                            dataset=iris_dataset, 
                                            column_x='Species', 
                                            column_y='setosa', 
                                            test_size=0.1)
iris_validation_2

{'predictions': [1, -1, -1, 1, 1, 1, 1, -1, 1, 1, 1, 1, -1, 1, 1],
 'prediction_success': '100.0%'}

# Tarefa 2 - Alterar binarização das classes (Setosa, Versicolor, Virginica)

In [5]:
iris_validation_virginica = run_perceptron_validation_iris(
                                            dataset=iris_dataset, 
                                            column_x='Species', 
                                            column_y='virginica', 
                                            test_size=0.1)
iris_validation_virginica

{'predictions': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 'prediction_success': '60.0%'}

In [6]:
iris_validation_versicolor = run_perceptron_validation_iris(
                                            dataset=iris_dataset, 
                                            column_x='Species', 
                                            column_y='versicolor', 
                                            test_size=0.1)
iris_validation_versicolor

{'predictions': [-1, -1, -1, 1, -1, -1, 1, -1, -1, -1, -1, 1, 1, -1, 1],
 'prediction_success': '66.66666666666666%'}

# Tarefa 3 - Alterar o tamanho do teste

In [7]:
iris_validation_with_greater_test_size = run_perceptron_validation_iris(
                                            dataset=iris_dataset, 
                                            column_x='Species', 
                                            column_y='setosa', 
                                            test_size=0.2)
iris_validation_with_greater_test_size

{'predictions': [1,
  1,
  1,
  1,
  1,
  -1,
  1,
  -1,
  -1,
  1,
  1,
  1,
  1,
  -1,
  1,
  1,
  1,
  1,
  -1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  -1,
  -1,
  1,
  1],
 'prediction_success': '100.0%'}

# Tarefa 4 - Explorar dataset UCI

 - Dataset: https://archive.ics.uci.edu/ml/datasets/Credit+Approval

In [8]:
credit_dataset = pd.read_csv('files/credit_approval.csv')
credit_dataset

Unnamed: 0,ATTR_1,ATTR_2,ATTR_3,ATTR_4,ATTR_5,ATTR_6,ATTR_7,ATTR_8,ATTR_9,ATTR_10,ATTR_11,ATTR_12,ATTR_13,ATTR_14,ATTR_15,CLASS
0,b,30.83,0.00,u,g,w,v,1.25,t,t,1,f,g,202,0,+
1,a,58.67,4.46,u,g,q,h,3.04,t,t,6,f,g,43,560,+
2,a,24.50,0.50,u,g,q,h,1.50,t,f,0,f,g,280,824,+
3,b,27.83,1.54,u,g,w,v,3.75,t,t,5,t,g,100,3,+
4,b,20.17,5625.00,u,g,w,v,1.71,t,f,0,f,s,120,0,+
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
685,b,21.08,10085.00,y,p,e,h,1.25,f,f,0,f,g,260,0,-
686,a,22.67,0.75,u,g,c,v,2.00,f,t,2,t,g,200,394,-
687,a,25.25,13.50,y,p,ff,ff,2.00,f,t,1,t,g,200,1,-
688,b,17.92,205.00,u,g,aa,v,0.04,f,f,0,f,g,280,750,-


In [9]:
tabulated_attrs = ['1', '4', '5', '6', '7', '9', '10', '12', '13']
for attr in tabulated_attrs:
    print(credit_dataset[f'ATTR_{attr}'].unique())

['b' 'a' '?']
['u' 'y' '?' 'l']
['g' 'p' '?' 'gg']
['w' 'q' 'm' 'r' 'cc' 'k' 'c' 'd' 'x' 'i' 'e' 'aa' 'ff' 'j' '?']
['v' 'h' 'bb' 'ff' 'j' 'z' '?' 'o' 'dd' 'n']
['t' 'f']
['t' 'f']
['f' 't']
['g' 's' 'p']


# Removendo linhas com valores nulos

In [10]:
for attr in tabulated_attrs:
    credit_dataset = credit_dataset.drop(
        credit_dataset[credit_dataset[f'ATTR_{attr}'] == '?'].index)

In [11]:
for attr in tabulated_attrs:
    print(credit_dataset[f'ATTR_{attr}'].unique())

['b' 'a']
['u' 'y' 'l']
['g' 'p' 'gg']
['w' 'q' 'm' 'r' 'cc' 'k' 'c' 'd' 'x' 'i' 'e' 'aa' 'ff' 'j']
['v' 'h' 'bb' 'ff' 'j' 'z' 'o' 'dd' 'n']
['t' 'f']
['t' 'f']
['f' 't']
['g' 's' 'p']


# Transformando valores CHAR em numéricos

In [12]:
credit_dataset.dtypes

ATTR_1      object
ATTR_2      object
ATTR_3     float64
ATTR_4      object
ATTR_5      object
ATTR_6      object
ATTR_7      object
ATTR_8     float64
ATTR_9      object
ATTR_10     object
ATTR_11      int64
ATTR_12     object
ATTR_13     object
ATTR_14     object
ATTR_15      int64
CLASS       object
dtype: object

In [13]:
for attr in tabulated_attrs:
    credit_dataset[f'ATTR_{attr}'] = pd.factorize(credit_dataset[f'ATTR_{attr}'])[0]

In [14]:
credit_dataset.dtypes

ATTR_1       int64
ATTR_2      object
ATTR_3     float64
ATTR_4       int64
ATTR_5       int64
ATTR_6       int64
ATTR_7       int64
ATTR_8     float64
ATTR_9       int64
ATTR_10      int64
ATTR_11      int64
ATTR_12      int64
ATTR_13      int64
ATTR_14     object
ATTR_15      int64
CLASS       object
dtype: object

In [15]:
for attr in tabulated_attrs:
    print(credit_dataset[f'ATTR_{attr}'].unique())

[0 1]
[0 1 2]
[0 1 2]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13]
[0 1 2 3 4 5 6 7 8]
[0 1]
[0 1]
[0 1]
[0 1 2]


In [16]:
credit_dataset["CLASS"] = np.where(credit_dataset["CLASS"].astype('string') == "+", 1, 0)

In [17]:
credit_dataset["CLASS"].unique()

array([1, 0])

In [18]:
output = run_perceptron_validation_credit(credit_dataset, 0.1)

TypeError: can't multiply sequence by non-int of type 'float'

In [None]:
output