In [None]:
import numpy
from numpy import genfromtxt

# Abrindo o CSV
data = genfromtxt('dataset/data_limpo_semINTMAX.csv', delimiter=',')
print(data.shape)

In [None]:
# Pegando apenas as entradas e saidas

idade = data[1: , 163] # Idade
acuracia = data[1:, 165] # Acuracia

data = data[1: , 0:163] # Perguntas

(nData, nFeatures) = data.shape

print('nData: ', nData)
print('nFeatures: ', nFeatures)

In [None]:
# Pegando apenas os 16 resultados

# Abrindo o CSV
data16 = genfromtxt('dataset/output_limpo_semINTMAX.csv', delimiter=',')
print(data16)

# Sobrescreve as perguntas com os 16 resultados
data = data16

In [None]:
# Agrupando as idades

gIdade = idade

for i in range(0, nData):
    if idade[i] < 20:
        gIdade[i] = 0
        
    elif idade[i] < 40:
        gIdade[i] = 1
        
    elif idade[i] < 60:
        gIdade[i] = 2
        
    else:
        gIdade[i] = 3

classes = ['0', '1', '2', '3']

print(gIdade)

In [None]:
# Separando em dados de teste e treino
from sklearn.model_selection import train_test_split

(dataTrain, dataTest, yTrain, yTest, acuraciaTrain, acuraciaTest) = train_test_split(data, idade, acuracia, test_size = 0.1, shuffle=True)

print(dataTrain.shape, yTrain.shape)
print(dataTest.shape, yTest.shape)

In [None]:
# PCA
from sklearn.decomposition import PCA

pca = PCA(n_components=0.9, svd_solver='full')
pca.fit(dataTrain)
dataTrain = pca.transform(dataTrain)
dataTest = pca.transform(dataTest)

print(dataTrain.shape, dataTest.shape)

In [None]:
# Regressão Linear sem Acurácia
from sklearn.linear_model import LinearRegression

md = LinearRegression(
            fit_intercept=True,
            normalize=True,
            copy_X=True,
            n_jobs=-1
        )

md.fit(dataTrain, yTrain)
print('Train: ', md.score(dataTrain, yTrain))
print('Test:  ', md.score(dataTest, yTest))

In [None]:
# Regressão Linear com Acurácia
from sklearn.linear_model import LinearRegression

md = LinearRegression(
            fit_intercept=True,
            normalize=True,
            copy_X=True,
            n_jobs=-1
        )

md.fit(dataTrain, yTrain, sample_weight=acuraciaTrain)
print('Train: ', md.score(dataTrain, yTrain, sample_weight=acuraciaTrain))
print('Test:  ', md.score(dataTest, yTest, sample_weight=acuraciaTest))

In [None]:
# Linear SVC
from sklearn.svm import LinearSVC

md = LinearSVC(
            penalty='l2',
            loss='squared_hinge',
            dual=True,
            tol=1e-4,
            C=1.0,
            multi_class='ovr',
            fit_intercept=True,
            intercept_scaling=1,
            verbose=1,
            random_state=None,
            max_iter=100000
        )

md.fit(dataTrain, yTrain)
print('Train: ', md.score(dataTrain, yTrain))
print('Test: ', md.score(dataTest, yTest))

In [None]:
# Perceptron sem Acurácia
from sklearn.linear_model import Perceptron

md = Perceptron(
            penalty='l2',
            alpha=1e-8,
            fit_intercept=True,
            max_iter=1e6,
            tol=1e-5,
            shuffle=True,
#             verbose=1,
            eta0=0.5,
            n_jobs=-1,
            random_state=None,
            class_weight=None,
            warm_start=False,
        )

md.fit(dataTrain, yTrain)
print('Train: ', md.score(dataTrain, yTrain))
print('Test:  ', md.score(dataTest, yTest))

In [None]:
# Perceptron com Acurácia
from sklearn.linear_model import Perceptron

md = Perceptron(
            penalty='l2',
            alpha=1e-8,
            fit_intercept=True,
            max_iter=1e6,
            tol=1e-5,
            shuffle=True,
#             verbose=1,
            eta0=0.5,
            n_jobs=-1,
            random_state=None,
            class_weight=None,
            warm_start=False,
        )

md.fit(dataTrain, yTrain, sample_weight=acuraciaTrain)
print('Train: ', md.score(dataTrain, yTrain, sample_weight=acuraciaTrain))
print('Test:  ', md.score(dataTest, yTest, sample_weight=acuraciaTest))

In [None]:
# Cria uma rede neural
from sklearn.neural_network import MLPClassifier

md = MLPClassifier(
            hidden_layer_sizes = (25),
            activation = 'identity',
            solver = 'sgd',
            alpha = 1e-6,
            batch_size = 'auto',
            learning_rate = 'adaptive',
            learning_rate_init = 0.001,
            power_t = 0.5,
            max_iter = 10000,
            shuffle = True,
            random_state = None,
            tol = 1e-4,
            verbose = True,
            warm_start = False,
            momentum = 0.9,
            nesterovs_momentum = True,
            early_stopping = False,
            validation_fraction = 0.15    
        )

md.fit(dataTrain, yTrain)
print('Train: ', md.score(dataTrain, yTrain))
print('Test:  ', md.score(dataTest, yTest))

In [None]:
# Separa as entradas de idades com predições corretas em diferentes arquivos

# Predizemos nossos dados
yPred = md.predict(dataTest)

(nDataTest, nFeaturesTest) = dataTest.shape

# Para idades de 13 a 80 anos
for idd in range(13, 80):
    # Criamos uma matriz
    dados_copia = numpy.zeros(dataTest.shape)
    
    # Indices
    i=0
    j=dataTest.shape[0]-1
    p=0
    
    # Preenchemos a matriz de tal forma que os dados com predição correta fiquem no começo, e os dados com predição errada fiquem no final
    while (i<=j):
        if (int(yPred[p]) == idd):
            dados_copia[i] = dataTest[p]
            i = i+1
        else:
            dados_copia[j] = dataTest[p]
        j = j-1

        p = p+1

    # Pegamos apenas os dados que foram preditos corretamente
    dados_copia = dados_copia[0:i][:]


    # Salva tais dados em um arquivo CSV
    numpy.savetxt('idades/'+str(igual)+'.csv', dados_copia, delimiter=',')

In [None]:
# Matriz de confusão

from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import itertools

# Primeiro, os dados tem os valores de cada tem: yTest
# Agora vou montar meu vetor de predições: yPredict

yPred = md.predict(dataTest)
classes = ['13-20', '20-40', '40-60', '60-']

matrix = confusion_matrix(yTest, yPred)

def plot_confusion_matrix(cm, classes, normalize=True, title='Confusion matrix', cmap=plt.cm.Blues):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, numpy.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = numpy.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

    
# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(matrix,classes=classes, title='Confusion matrix, without normalization')

