# Criando a inteligência

In [1]:
import pandas as pd
import numpy as np 
from numpy.random import randint
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score


# Criando as comparações

In [2]:
comparações = {
			"modelo": {
		2013: [],
		2014: [],
		2015: [],
		2016: [],
		2017: [],
		2018: [],
		2019: [],
		2020: []
		},
			"aleatório": {
		2013: [],
		2014: [],
		2015: [],
		2016: [],
		2017: [],
		2018: [],
		2019: [],
		2020: []
		}
}

In [3]:
preditores = {}

# Agora, vamos rodar os testes

Para cada ano, iremos gerar 30 sementes aleatórias e criar classificadores. 

Em seguida, vamos comparar as acurácias do modelo com um classificador aleatório.

In [4]:
for ano in range(2013,2021):
    for seed in range(100):
        df = pd.read_csv(f"datasets/{ano}.csv")
        X, y = df.iloc[:,2:-1], df.iloc[:,-1]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = seed) 

        scaler = MinMaxScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)



        classificador = LinearSVC(dual=False, max_iter=3000)
        ova = OneVsRestClassifier(classificador)

        ova.fit(X_train, y_train)
        
        preditores[ano] = ova
        
        
        acc_modelo = accuracy_score(y_test, ova.predict(X_test))
        acc_rand = accuracy_score(y_test, randint(-1,2, size=y_test.shape))
        comparações["modelo"][ano].append(acc_modelo)
        comparações["aleatório"][ano].append(acc_rand)

In [5]:
resultados = pd.DataFrame(comparações, index=range(2013, 2021)) \
    .apply(lambda x: [np.mean(x["modelo"]), np.std(x["modelo"]), 
                      np.mean(x["aleatório"]), np.std(x["aleatório"])], 
           axis=1, result_type="expand")

resultados.columns = ["média modelo", "desvio modelo", "média aleatório", "desvio aleatório"]

In [6]:
resultados

Unnamed: 0,média modelo,desvio modelo,média aleatório,desvio aleatório
2013,0.477368,0.05306,0.327237,0.055001
2014,0.499211,0.050167,0.332237,0.05944
2015,0.516447,0.045746,0.340263,0.056778
2016,0.537763,0.056858,0.329211,0.056532
2017,0.450789,0.05169,0.333289,0.055296
2018,0.540263,0.050579,0.340658,0.057139
2019,0.506579,0.051282,0.328289,0.045708
2020,0.427313,0.044158,0.328507,0.053752


# Descobrindo as melhores sementes

In [7]:
best_seed = {}

print("ano", "semente", "acurácia", sep="\t")
for ano in range(2013,2021):
    resultado = comparações["modelo"][ano]
    semente = np.argmax(resultado)
    print(ano, semente, np.max(resultado), sep="\t")
    best_seed.update({ano: semente })

ano	semente	acurácia
2013	74	0.6052631578947368
2014	58	0.618421052631579
2015	94	0.631578947368421
2016	18	0.6578947368421053
2017	66	0.5921052631578947
2018	1	0.6842105263157895
2019	42	0.6447368421052632
2020	75	0.582089552238806


In [8]:
best_seed

{2013: 74, 2014: 58, 2015: 94, 2016: 18, 2017: 66, 2018: 1, 2019: 42, 2020: 75}