# Importando bases

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import GaussianNB
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.neighbors import KNeighborsClassifier

In [2]:
df_bolsonaro_train = pd.read_excel('Bolsonaro_treino_sem_stop2.xlsx')  
df_lula_train = pd.read_excel('Lula_treino_sem_stop2.xlsx') 
df_simone_train = pd.read_excel('Simone_treino_sem_stop2.xlsx') 
df_ciro_train = pd.read_excel('Ciro_treino_sem_stop2.xlsx') 

In [3]:
df_bolsonaro = pd.read_excel('Bolsonaro_todos_sem_stop.xlsx', index_col=0)
df_lula = pd.read_excel('Lula_todos_sem_stop.xlsx', index_col=0)
df_simone = pd.read_excel('Simone_todos_sem_stop.xlsx', index_col=0)
df_ciro = pd.read_excel('Ciro_todos_sem_stop.xlsx', index_col=0)



In [4]:
df_lula_train = df_lula_train[df_lula_train['Sentimento']!=' ']

In [5]:
print(df_lula_train['Sentimento'].value_counts())
print(df_bolsonaro_train['Sentimento'].value_counts())
print(df_simone_train['Sentimento'].value_counts())
print(df_ciro_train['Sentimento'].value_counts())

p    10335
n     1773
x       81
Name: Sentimento, dtype: int64
p    5065
n     848
x     108
Name: Sentimento, dtype: int64
p    120
x    109
n     50
Name: Sentimento, dtype: int64
p    6807
x     105
n      81
Name: Sentimento, dtype: int64


In [6]:
def train_test_under(df):
    tamanho = df['Sentimento'].value_counts()[2]-5
    
    df_train = df[df['Sentimento']=='p'].sample(n = tamanho)
    df_train = df_train.append(df[df['Sentimento']=='n'].sample(n = tamanho))
    df_train = df_train.append(df[df['Sentimento']=='x'].sample(n = tamanho))
    
    df_teste = df.drop(df_train.index)
    
    X_bw_train, bw = Bag_of_words(df_train)
    X_tfidf_train, tfidf = tfidf(df_train)
    Y_train = df_train['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int)
    
    X_bw_teste = Bag_of_words_teste(df_teste, bw )
    X_tfidf_teste = tfidf_teste(df_teste, tfidf)
    Y_teste = df_teste['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int)
    
    return X_bw_train, X_tfidf_train, Y_train, X_bw_teste, X_tfidf_teste, Y_teste, bw, tfidf, df_train, df_teste


In [7]:
# Bag Of Words

def Bag_of_words(df):
    matrix = CountVectorizer()
    text = []
    for i in df.index:
        text.append("".join(df['novo_texto'][i]))
    X = matrix.fit_transform(text)
    return X, matrix

def Bag_of_words_teste(df, matrix):
    text = []
    for i in df.index:
        text.append("".join(df['novo_texto'][i]))
    X = matrix.transform(text)
    return X

def tfidf_(df):
    matrix = TfidfVectorizer()
    text = []
    for i in df.index:
        text.append("".join(df['novo_texto'][i]))
    X = matrix.fit_transform(text)
    return X, matrix

def tfidf_teste(df, matrix):
    text = []
    for i in df.index:
        text.append("".join(df['novo_texto'][i]))
    X = matrix.transform(text)
    return X

def train_test_under_bw(df):
    tamanho = df['Sentimento'].value_counts()[2]-2
    
    df_train = df[df['Sentimento']=='p'].sample(n = tamanho)
    df_train = df_train.append(df[df['Sentimento']=='n'].sample(n = tamanho))
    df_train = df_train.append(df[df['Sentimento']=='x'].sample(n = tamanho))
    
    df_teste = df.drop(df_train.index)
    
    X_bw_train, bw = Bag_of_words(df_train)
    Y_train = df_train['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int)
    
    X_bw_teste = Bag_of_words_teste(df_teste, bw )
    
    Y_teste = df_teste['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int)
    
    return X_bw_train, Y_train, X_bw_teste, Y_teste, bw, df_train, df_teste

def train_test_under_tfidf(df):
    tamanho = df['Sentimento'].value_counts()[2]-2
    
    df_train = df[df['Sentimento']=='p'].sample(n = tamanho)
    df_train = df_train.append(df[df['Sentimento']=='n'].sample(n = tamanho))
    df_train = df_train.append(df[df['Sentimento']=='x'].sample(n = tamanho))
    
    df_teste = df.drop(df_train.index)
    
    X_tfidf_train, tfidf = tfidf_(df_train)
    Y_train = df_train['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int)
    
    X_tfidf_teste = tfidf_teste(df_teste, tfidf)
    Y_teste = df_teste['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int)
    
    return X_tfidf_train, Y_train, X_tfidf_teste, Y_teste, tfidf, df_train, df_teste

def train_model_bw(model, df_train_tudo, num_tests):
    X_train, Y_train, X_teste, Y_teste, bw, df_train, df_teste = train_test_under_bw(df_train_tudo)
    
    model2 = model.fit(X_train.toarray(), Y_train)
    predict = model2.predict(X_teste.toarray())
    accuracy = accuracy_score(Y_teste, predict)
    precision = precision_score(Y_teste, predict, average='macro')
    recall = recall_score(Y_teste, predict, average='macro')
    f1 = f1_score(Y_teste, predict, average='macro')
    score = accuracy + precision + recall + f1
    
    matrix_confusion = confusion_matrix(Y_teste, predict,labels=[-1, 0, 1])
    
    errados = matrix_confusion[0][2]+ matrix_confusion[2][0]
    
    model2_fim, bw_fim, df_train_fim, df_teste_fim, score_fim, errados_fim = model2, bw, df_train, df_teste, score, errados
    for i in range(0,num_tests):
        X_train, Y_train, X_teste, Y_teste, bw, df_train, df_teste = train_test_under_bw(df_train_tudo)

        model2 = model.fit(X_train.toarray(), Y_train)
        predict = model2.predict(X_teste.toarray())
        accuracy = accuracy_score(Y_teste, predict)
        precision = precision_score(Y_teste, predict, average='macro')
        recall = recall_score(Y_teste, predict, average='macro')
        f1 = f1_score(Y_teste, predict, average='macro')
        score = accuracy + precision + recall + f1

        matrix_confusion = confusion_matrix(Y_teste, predict,labels=[-1, 0, 1])
        accuracy_fim, precision_fim,recall_fim, f1_fim, matrix_confusion_fim = accuracy, precision,recall,f1,matrix_confusion 
        errados = matrix_confusion[0][2]+ matrix_confusion[2][0]
        
        if((score_fim<score) & (errados_fim>errados)):
            accuracy_fim, precision_fim,recall_fim, f1_fim, matrix_confusion_fim = accuracy, precision,recall,f1,matrix_confusion 
            model2_fim, bw_fim, df_train_fim, df_teste_fim, score_fim, errados_fim = model2, bw, df_train, df_teste, score, errados
    
    print('accuracy:')
    print(accuracy_fim)
    print('precision:')
    print(precision_fim)
    print('recall:')
    print(recall_fim)
    print('f1:')
    print(f1_fim)
    print('matrix_confusion:')
    print(matrix_confusion_fim)
    print(score_fim/4)
    print(errados_fim)
    return model2_fim, bw_fim, df_train_fim, df_teste_fim     
    
def train_model_tfidf(model, df_train_tudo, num_tests):
    X_train, Y_train, X_teste, Y_teste, tfidf, df_train, df_teste = train_test_under_tfidf(df_train_tudo)
    
    model2 = model.fit(X_train.toarray(), Y_train)
    predict = model2.predict(X_teste.toarray())
    accuracy = accuracy_score(Y_teste, predict)
    precision = precision_score(Y_teste, predict, average='macro')
    recall = recall_score(Y_teste, predict, average='macro')
    f1 = f1_score(Y_teste, predict, average='macro')
    score = accuracy + precision + recall + f1
    
    matrix_confusion = confusion_matrix(Y_teste, predict,labels=[-1, 0, 1])
    accuracy_fim, precision_fim,recall_fim, f1_fim, matrix_confusion_fim = accuracy, precision,recall,f1,matrix_confusion 
    errados = matrix_confusion[0][2]+ matrix_confusion[2][0]
    
    model2_fim, tfidf_fim, df_train_fim, df_teste_fim, score_fim, errados_fim = model2, tfidf, df_train, df_teste, score, errados
    for i in range(0,num_tests):
        X_train, Y_train, X_teste, Y_teste, tfidf, df_train, df_teste = train_test_under_tfidf(df_train_tudo)

        model2 = model.fit(X_train.toarray(), Y_train)
        predict = model2.predict(X_teste.toarray())
        accuracy = accuracy_score(Y_teste, predict)
        precision = precision_score(Y_teste, predict, average='macro')
        recall = recall_score(Y_teste, predict, average='macro')
        f1 = f1_score(Y_teste, predict, average='macro')
        score = accuracy + precision + recall + f1

        matrix_confusion = confusion_matrix(Y_teste, predict,labels=[-1, 0, 1])

        errados = matrix_confusion[0][2]+ matrix_confusion[2][0]
        
        if((score_fim<score) & (errados_fim>errados)):
            accuracy_fim, precision_fim,recall_fim, f1_fim, matrix_confusion_fim = accuracy, precision,recall,f1,matrix_confusion 
            model2_fim, tfidf_fim, df_train_fim, df_teste_fim, score_fim, errados_fim = model2, tfidf, df_train, df_teste, score, errados
    
    print('accuracy:')
    print(accuracy_fim)
    print('precision:')
    print(precision_fim)
    print('recall:')
    print(recall_fim)
    print('f1:')
    print(f1_fim)
    print('matrix_confusion:')
    print(matrix_confusion_fim)
    print(score_fim/4)
    print(errados_fim)
    return model2_fim, tfidf_fim, df_train_fim, df_teste_fim  

def AnaliseSentimentos_bw(df, bw,model, name_colum):
    i1=0
    for i in range(5000,len(df),5000):
        if (i==5000):
            predict = model.predict_proba(Bag_of_words_teste(df[i1:i], bw).toarray())
        else:
            predict = np.vstack((predict, model.predict_proba(Bag_of_words_teste(df[i1:i], bw).toarray())))
        i1=i
        
    predict = np.vstack((predict,model.predict_proba(Bag_of_words_teste(df[i1:], bw).toarray())))
    df_predict = pd.DataFrame(predict)
    df[name_colum+'-1'] = df_predict[0]
    df[name_colum+'0'] = df_predict[1]
    df[name_colum+'1'] = df_predict[2]
    
    print(np.unique(predict, return_counts=True))
    return df

def AnaliseSentimentos_tfidf(df, tfidf,model, name_colum):
    i1=0
    for i in range(5000,len(df),5000):
        if (i==5000):
            predict = model.predict_proba(tfidf_teste(df[i1:i], tfidf).toarray())
        else:
            predict = np.vstack((predict, model.predict_proba(tfidf_teste(df[i1:i], tfidf).toarray())))
        i1=i
        
    predict = np.vstack((predict,model.predict_proba(tfidf_teste(df[i1:], tfidf).toarray())))

    df[name_colum+'-1'] = pd.DataFrame(predict)[0]
    df[name_colum+'0'] = pd.DataFrame(predict)[1]
    df[name_colum+'1'] = pd.DataFrame(predict)[2]

    print(np.unique(predict, return_counts=True))
    return df

## Bolsonaro

### Bolsonaro - Arvore de Decisao - bw

In [8]:
%%time
model_tree_bolsonaro_bw, tree_bolsonaro_bw, df_train_tree_bolsonaro_bw, df_teste_tree_bolsonaro_bw = \
train_model_bw(tree.DecisionTreeClassifier(), df_bolsonaro_train, 100)
model_tree_bolsonaro_bw = model_tree_bolsonaro_bw.fit(Bag_of_words_teste(df_train_tree_bolsonaro_bw, tree_bolsonaro_bw), df_train_tree_bolsonaro_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_bolsonaro = AnaliseSentimentos_bw(df_bolsonaro, tree_bolsonaro_bw,model_tree_bolsonaro_bw, 'predict_bw_ArvoreDeDecisao')


accuracy:
0.37699456426442224
precision:
0.3956288034393241
recall:
0.5854464832652005
f1:
0.2823154229477229
matrix_confusion:
[[ 282  305  155]
 [   0    2    0]
 [ 791 2302 1866]]
0.49561039199809714
850
(array([0., 1.]), array([1892876,  946438], dtype=int64))
Wall time: 1min 9s


### Bolsonaro - Arvore de Decisao - tfidf

In [10]:
%%time
model_tree_bolsonaro_tfidf, tree_bolsonaro_tfidf, df_train_tree_bolsonaro_tfidf, df_teste_tree_bolsonaro_tfidf = \
train_model_tfidf(tree.DecisionTreeClassifier(), df_bolsonaro_train, 100)
model_tree_bolsonaro_tfidf = model_tree_bolsonaro_tfidf.fit(tfidf_teste(df_train_tree_bolsonaro_tfidf, tree_bolsonaro_tfidf), df_train_tree_bolsonaro_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_bolsonaro = AnaliseSentimentos_tfidf(df_bolsonaro, tree_bolsonaro_tfidf,model_tree_bolsonaro_tfidf, 'predict_tfidf_ArvoreDeDecisao')


accuracy:
0.534806242328599
precision:
0.3939394441254092
recall:
0.48087117598811596
f1:
0.3374284060477679
matrix_confusion:
[[ 286  171  285]
 [   0    1    1]
 [ 754 1442 2763]]
0.436761317122473
1039
(array([0., 1.]), array([1892876,  946438], dtype=int64))
Wall time: 1min 9s


### Bolsonaro - Naive Bayes - bw

In [11]:
%%time
model_nb_bolsonaro_bw, nb_bolsonaro_bw, df_train_nb_bolsonaro_bw, df_teste_nb_bolsonaro_bw = \
train_model_bw(GaussianNB(), df_bolsonaro_train, 100)
model_nb_bolsonaro_bw = model_nb_bolsonaro_bw.fit(Bag_of_words_teste(df_train_nb_bolsonaro_bw, nb_bolsonaro_bw).toarray(), df_train_nb_bolsonaro_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_bolsonaro = AnaliseSentimentos_bw(df_bolsonaro, nb_bolsonaro_bw,model_nb_bolsonaro_bw, 'predict_bw_NaiveBayes')


accuracy:
0.3752411011748203
precision:
0.3738575536909448
recall:
0.6248862414838513
f1:
0.265000658179916
matrix_confusion:
[[ 387  219  136]
 [   0    2    0]
 [1623 1585 1751]]
0.4379753693373837
1247
(array([0.e+000, 1.e-323, 2.e-323, ..., 1.e+000, 1.e+000, 1.e+000]), array([1798330,       6,       2, ...,     951,     324,  928476],
      dtype=int64))
Wall time: 2min 34s


### Bolsonaro - Naive Bayes - tfidf

In [12]:
%%time
model_nb_bolsonaro_tfidf, nb_bolsonaro_tfidf, df_train_nb_bolsonaro_tfidf, df_teste_nb_bolsonaro_tfidf = \
train_model_tfidf(GaussianNB(), df_bolsonaro_train, 100)
model_nb_bolsonaro_tfidf = model_nb_bolsonaro_tfidf.fit(tfidf_teste(df_train_nb_bolsonaro_tfidf, nb_bolsonaro_tfidf).toarray(), df_train_nb_bolsonaro_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_bolsonaro = AnaliseSentimentos_tfidf(df_bolsonaro, nb_bolsonaro_tfidf,model_nb_bolsonaro_tfidf, 'predict_tfidf_NaiveBayes')


accuracy:
0.4625635630369981
precision:
0.3680495117544142
recall:
0.6316194411424353
f1:
0.29544089380895405
matrix_confusion:
[[ 317  195  230]
 [   0    2    0]
 [1325 1315 2319]]
0.4394183524357004
1555
(array([0.0e+000, 9.9e-324, 1.5e-323, ..., 1.0e+000, 1.0e+000, 1.0e+000]), array([1668667,       7,       5, ...,      10,     967,  939961],
      dtype=int64))
Wall time: 2min 30s


### Bolsonaro - Random Forest - bw

In [13]:
%%time
model_RandomForest_bolsonaro_bw, RandomForest_bolsonaro_bw, df_train_RandomForest_bolsonaro_bw, df_teste_RandomForest_bolsonaro_bw = \
train_model_bw(RandomForestClassifier(), df_bolsonaro_train, 100)
model_RandomForest_bolsonaro_bw = model_RandomForest_bolsonaro_bw.fit(Bag_of_words_teste(df_train_RandomForest_bolsonaro_bw, RandomForest_bolsonaro_bw).toarray(), df_train_RandomForest_bolsonaro_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_bolsonaro = AnaliseSentimentos_bw(df_bolsonaro, RandomForest_bolsonaro_bw,model_RandomForest_bolsonaro_bw, 'predict_bw_RandomForest')


accuracy:
0.5639137296159916
precision:
0.39651575508813713
recall:
0.6712353971026025
f1:
0.3492575596547973
matrix_confusion:
[[ 319  169  254]
 [   0    2    0]
 [ 868 1196 2895]]
0.521417691807179
705
(array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ,
       0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,
       0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32,
       0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43,
       0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54,
       0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65,
       0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76,
       0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87,
       0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98,
       0.99, 1.  ]), array([ 6624, 14257, 53487, 18562, 28170, 31998, 30082, 31346, 35421,
       36303, 32541, 42863, 42436, 44135, 4160

### Bolsonaro - Random Forest - tfidf

In [14]:
%%time
model_RandomForest_bolsonaro_tfidf, RandomForest_bolsonaro_tfidf, df_train_RandomForest_bolsonaro_tfidf, df_teste_RandomForest_bolsonaro_tfidf = \
train_model_tfidf(RandomForestClassifier(), df_bolsonaro_train, 100)
model_RandomForest_bolsonaro_tfidf = model_RandomForest_bolsonaro_tfidf.fit(tfidf_teste(df_train_RandomForest_bolsonaro_tfidf, RandomForest_bolsonaro_tfidf).toarray(), df_train_RandomForest_bolsonaro_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_bolsonaro = AnaliseSentimentos_tfidf(df_bolsonaro, RandomForest_bolsonaro_tfidf,model_RandomForest_bolsonaro_tfidf, 'predict_tfidf_RandomForest')


accuracy:
0.638435910924075
precision:
0.4061924899964308
recall:
0.5079903184549968
f1:
0.3686335149198128
matrix_confusion:
[[ 253  147  342]
 [   0    1    1]
 [ 564 1008 3387]]
0.48031305857382883
906
(array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ,
       0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,
       0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32,
       0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43,
       0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54,
       0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65,
       0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76,
       0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87,
       0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98,
       0.99, 1.  ]), array([ 3479,  2383,  3532,  9545, 13493, 33863, 24220, 27864, 35972,
       39536, 43894, 40646, 46331, 43449, 4666

### Bolsonaro - SVM - bw

In [15]:
%%time
model_svc_bolsonaro_bw, svc_bolsonaro_bw, df_train_svc_bolsonaro_bw, df_teste_svc_bolsonaro_bw = \
train_model_bw(SVC(probability=True), df_bolsonaro_train, 100)
model_svc_bolsonaro_bw = model_svc_bolsonaro_bw.fit(Bag_of_words_teste(df_train_svc_bolsonaro_bw, svc_bolsonaro_bw).toarray(), df_train_svc_bolsonaro_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_bolsonaro = AnaliseSentimentos_bw(df_bolsonaro, svc_bolsonaro_bw,model_svc_bolsonaro_bw, 'predict_bw_SVM')

accuracy:
0.6037173417499562
precision:
0.3821183130648977
recall:
0.5275347698386428
f1:
0.3508499044977406
matrix_confusion:
[[ 339  109  294]
 [   1    1    0]
 [1123  733 3103]]
0.45721702033993916
782
(array([0.07101874, 0.07750577, 0.08272359, ..., 0.81559389, 0.81972194,
       0.82835011]), array([1, 1, 2, ..., 1, 1, 1], dtype=int64))
Wall time: 16min 11s


### Bolsonaro - SVM - tfidf

In [16]:
%%time
model_svc_bolsonaro_tfidf, svc_bolsonaro_tfidf, df_train_svc_bolsonaro_tfidf, df_teste_svc_bolsonaro_tfidf = \
train_model_tfidf(SVC(probability=True), df_bolsonaro_train, 100)
model_svc_bolsonaro_tfidf = model_svc_bolsonaro_tfidf.fit(tfidf_teste(df_train_svc_bolsonaro_tfidf, svc_bolsonaro_tfidf).toarray(), df_train_svc_bolsonaro_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_bolsonaro = AnaliseSentimentos_tfidf(df_bolsonaro, svc_bolsonaro_tfidf,model_svc_bolsonaro_tfidf, 'predict_tfidf_SVM')

accuracy:
0.6468525337541645
precision:
0.38786355525485966
recall:
0.7114338473959062
f1:
0.3678637437075329
matrix_confusion:
[[ 341   84  317]
 [   0    2    0]
 [1039  574 3346]]
0.5285034200281158
1356
(array([0.01150762, 0.01226914, 0.01277185, ..., 0.94636407, 0.94878761,
       0.95074723]), array([  1,  63, 261, ...,  63,   1,   4], dtype=int64))
Wall time: 15min 29s


### Bolsonaro - MLP - bw

In [17]:
%%time
model_mlp_bolsonaro_bw, mlp_bolsonaro_bw, df_train_mlp_bolsonaro_bw, df_teste_mlp_bolsonaro_bw = \
train_model_bw(MLPClassifier(max_iter=400), df_bolsonaro_train, 100)
model_mlp_bolsonaro_bw = model_mlp_bolsonaro_bw.fit(Bag_of_words_teste(df_train_mlp_bolsonaro_bw, mlp_bolsonaro_bw).toarray(), df_train_mlp_bolsonaro_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_bolsonaro = AnaliseSentimentos_bw(df_bolsonaro, mlp_bolsonaro_bw,model_mlp_bolsonaro_bw, 'predict_bw_MLP')

accuracy:
0.4746624583552516
precision:
0.36910767845195086
recall:
0.481882614437489
f1:
0.30025437956969436
matrix_confusion:
[[ 349  170  223]
 [   0    1    1]
 [1456 1146 2357]]
0.4747423977756179
1296
(array([3.87192721e-14, 1.25882172e-12, 2.11590942e-12, ...,
       9.99999993e-01, 1.00000000e+00, 1.00000000e+00]), array([1, 1, 1, ..., 1, 1, 1], dtype=int64))
Wall time: 8min 45s


### Bolsonaro - MLP - tfidf

In [18]:
%%time
model_mlp_bolsonaro_tfidf, mlp_bolsonaro_tfidf, df_train_mlp_bolsonaro_tfidf, df_teste_mlp_bolsonaro_tfidf = \
train_model_tfidf(MLPClassifier(max_iter=400), df_bolsonaro_train, 100)
model_mlp_bolsonaro_tfidf = model_mlp_bolsonaro_tfidf.fit(Bag_of_words_teste(df_train_mlp_bolsonaro_tfidf, mlp_bolsonaro_tfidf).toarray(), df_train_mlp_bolsonaro_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_bolsonaro = AnaliseSentimentos_tfidf(df_bolsonaro, mlp_bolsonaro_tfidf,model_mlp_bolsonaro_tfidf, 'predict_tfidf_MLP')

accuracy:
0.48921620199894794
precision:
0.3766446110568409
recall:
0.648330868376754
f1:
0.310504407529125
matrix_confusion:
[[ 334  194  214]
 [   0    2    0]
 [1266 1239 2454]]
0.4561740222404169
1480
(array([7.42121238e-05, 8.81140140e-05, 9.52289951e-05, ...,
       9.99719752e-01, 9.99786401e-01, 9.99837674e-01]), array([1, 1, 1, ..., 1, 1, 1], dtype=int64))
Wall time: 10min 20s


### Bolsonaro - KNN - bw

In [19]:
%%time
model_knn_bolsonaro_bw, knn_bolsonaro_bw, df_train_knn_bolsonaro_bw, df_teste_knn_bolsonaro_bw = \
train_model_bw(KNeighborsClassifier(), df_bolsonaro_train, 100)
model_knn_bolsonaro_bw = model_knn_bolsonaro_bw.fit(Bag_of_words_teste(df_train_knn_bolsonaro_bw, knn_bolsonaro_bw).toarray(), df_train_knn_bolsonaro_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_bolsonaro = AnaliseSentimentos_bw(df_bolsonaro, knn_bolsonaro_bw,model_knn_bolsonaro_bw, 'predict_bw_KNN')

accuracy:
0.15851306330001755
precision:
0.35619769767789583
recall:
0.5647260818133674
f1:
0.12998464602230822
matrix_confusion:
[[ 447  262   33]
 [   0    2    0]
 [2860 1644  455]]
0.4610769931297578
762
(array([0. , 0.2, 0.4, 0.6, 0.8, 1. ]), array([580725, 823215, 592848, 648959, 191433,   2134], dtype=int64))
Wall time: 2min 9s


### Bolsonaro - KNN - tfidf

In [20]:
%%time
model_knn_bolsonaro_tfidf, knn_bolsonaro_tfidf, df_train_knn_bolsonaro_tfidf, df_teste_knn_bolsonaro_tfidf = \
train_model_tfidf(KNeighborsClassifier(), df_bolsonaro_train, 100)
model_knn_bolsonaro_tfidf = model_knn_bolsonaro_tfidf.fit(Bag_of_words_teste(df_train_knn_bolsonaro_tfidf, knn_bolsonaro_tfidf).toarray(), df_train_knn_bolsonaro_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_bolsonaro = AnaliseSentimentos_tfidf(df_bolsonaro, knn_bolsonaro_tfidf,model_knn_bolsonaro_tfidf, 'predict_tfidf_KNN')

accuracy:
0.399088199193407
precision:
0.3684702429698051
recall:
0.6298256666027101
f1:
0.2706680177352723
matrix_confusion:
[[ 376  204  162]
 [   0    2    0]
 [1681 1380 1898]]
0.4170130316252986
1843
(array([0. , 0.2, 0.4, 0.6, 0.8, 1. ]), array([405761, 912558, 915392, 451664, 135839,  18100], dtype=int64))
Wall time: 2min 37s


# Lula

### Lula - Arvore de Decisao - bw

In [21]:
%%time
model_tree_lula_bw, tree_lula_bw, df_train_tree_lula_bw, df_teste_tree_lula_bw = \
train_model_bw(tree.DecisionTreeClassifier(), df_lula_train, 100)
model_tree_lula_bw = model_tree_lula_bw.fit(Bag_of_words_teste(df_train_tree_lula_bw, tree_lula_bw), df_train_tree_lula_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_lula = AnaliseSentimentos_bw(df_lula, tree_lula_bw,model_tree_lula_bw, 'predict_bw_ArvoreDeDecisao')


accuracy:
0.4245314591700134
precision:
0.38250788333414537
recall:
0.6115273477527058
f1:
0.29276802356754267
matrix_confusion:
[[ 690  604  400]
 [   0    2    0]
 [2301 3573 4382]]
0.3992221701353972
1501
(array([0., 1.]), array([1863460,  931730], dtype=int64))
Wall time: 1min 20s


### Lula - Arvore de Decisao - tfidf

In [22]:
%%time
model_tree_lula_tfidf, tree_lula_tfidf, df_train_tree_lula_tfidf, df_teste_tree_lula_tfidf = \
train_model_tfidf(tree.DecisionTreeClassifier(), df_lula_train, 100)
model_tree_lula_tfidf = model_tree_lula_tfidf.fit(tfidf_teste(df_train_tree_lula_tfidf, tree_lula_tfidf), df_train_tree_lula_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_lula = AnaliseSentimentos_tfidf(df_lula, tree_lula_tfidf,model_tree_lula_tfidf, 'predict_tfidf_ArvoreDeDecisao')


accuracy:
0.4943942436412316
precision:
0.3976839200014866
recall:
0.625852746624623
f1:
0.32485465690762244
matrix_confusion:
[[ 612  598  484]
 [   0    2    0]
 [1603 3358 5295]]
0.46069639179374094
2087
(array([0., 1.]), array([1863460,  931730], dtype=int64))
Wall time: 1min 22s


### Lula - Naive Bayes - bw

In [23]:
%%time
model_nb_lula_bw, nb_lula_bw, df_train_nb_lula_bw, df_teste_nb_lula_bw = \
train_model_bw(GaussianNB(), df_lula_train, 100)
model_nb_lula_bw = model_nb_lula_bw.fit(Bag_of_words_teste(df_train_nb_lula_bw, nb_lula_bw).toarray(), df_train_nb_lula_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_lula = AnaliseSentimentos_bw(df_lula, nb_lula_bw,model_nb_lula_bw, 'predict_bw_NaiveBayes')


accuracy:
0.3543340026773762
precision:
0.36191231986528877
recall:
0.5737453730734826
f1:
0.25383699967939305
matrix_confusion:
[[ 626  616  452]
 [   0    2    0]
 [2557 4092 3607]]
0.4110667979923319
2398
(array([0.00000000e+000, 9.88131292e-324, 1.48219694e-323, ...,
       9.99999046e-001, 1.00000000e+000, 1.00000000e+000]), array([1761306,      16,       8, ...,       1,       1,  931715],
      dtype=int64))
Wall time: 2min 56s


### Lula - Naive Bayes - tfidf

In [24]:
%%time
model_nb_lula_tfidf, nb_lula_tfidf, df_train_nb_lula_tfidf, df_teste_nb_lula_tfidf = \
train_model_tfidf(GaussianNB(), df_lula_train, 100)
model_nb_lula_tfidf = model_nb_lula_tfidf.fit(tfidf_teste(df_train_nb_lula_tfidf, nb_lula_tfidf).toarray(), df_train_nb_lula_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_lula = AnaliseSentimentos_tfidf(df_lula, nb_lula_tfidf,model_nb_lula_tfidf, 'predict_tfidf_NaiveBayes')


accuracy:
0.5086178045515395
precision:
0.363374724385262
recall:
0.6231643864337808
f1:
0.3073011848614248
matrix_confusion:
[[ 562  436  696]
 [   0    2    0]
 [2227 2514 5515]]
0.4506145250580018
2923
(array([0.0e+000, 9.9e-324, 1.5e-323, ..., 1.0e+000, 1.0e+000, 1.0e+000]), array([1728387,      27,      56, ...,      40,     887,  927436],
      dtype=int64))
Wall time: 2min 56s


### Lula - Random Forest - bw

In [25]:
%%time
model_RandomForest_lula_bw, RandomForest_lula_bw, df_train_RandomForest_lula_bw, df_teste_RandomForest_lula_bw = \
train_model_bw(RandomForestClassifier(), df_lula_train, 100)
model_RandomForest_lula_bw = model_RandomForest_lula_bw.fit(Bag_of_words_teste(df_train_RandomForest_lula_bw, RandomForest_lula_bw).toarray(), df_train_RandomForest_lula_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_lula = AnaliseSentimentos_bw(df_lula, RandomForest_lula_bw,model_RandomForest_lula_bw, 'predict_bw_RandomForest')


accuracy:
0.47431392235609104
precision:
0.4117632903389441
recall:
0.43203421761427724
f1:
0.31781702079873964
matrix_confusion:
[[ 494  793  407]
 [   0    1    1]
 [1109 3973 5174]]
0.46142314426004566
966
(array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ,
       0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,
       0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32,
       0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43,
       0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54,
       0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65,
       0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76,
       0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87,
       0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98,
       0.99, 1.  ]), array([ 5963, 21004,  9401, 12879, 15144, 20599, 23788, 28747, 39434,
       68162, 57892, 75700, 54594, 59475, 

### Lula - Random Forest - tfidf

In [26]:
%%time
model_RandomForest_lula_tfidf, RandomForest_lula_tfidf, df_train_RandomForest_lula_tfidf, df_teste_RandomForest_lula_tfidf = \
train_model_tfidf(RandomForestClassifier(), df_lula_train, 100)
model_RandomForest_lula_tfidf = model_RandomForest_lula_tfidf.fit(tfidf_teste(df_train_RandomForest_lula_tfidf, RandomForest_lula_tfidf).toarray(), df_train_RandomForest_lula_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_lula = AnaliseSentimentos_tfidf(df_lula, RandomForest_lula_tfidf,model_RandomForest_lula_tfidf, 'predict_tfidf_RandomForest')


accuracy:
0.5135542168674698
precision:
0.4099318951744024
recall:
0.628367395616722
f1:
0.3356627067018357
matrix_confusion:
[[ 582  659  453]
 [   0    2    0]
 [1328 3374 5554]]
0.47187905359010746
1781
(array([0.        , 0.0025    , 0.00666667, ..., 0.99      , 0.99333333,
       1.        ]), array([4536,    1,   11, ...,    7,    1,    1], dtype=int64))
Wall time: 3min 16s


### lula - SVM - bw

In [27]:
%%time
model_svc_lula_bw, svc_lula_bw, df_train_svc_lula_bw, df_teste_svc_lula_bw = \
train_model_bw(SVC(probability=True), df_lula_train, 100)
model_svc_lula_bw = model_svc_lula_bw.fit(Bag_of_words_teste(df_train_svc_lula_bw, svc_lula_bw).toarray(), df_train_svc_lula_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_lula = AnaliseSentimentos_bw(df_lula, svc_lula_bw,model_svc_lula_bw, 'predict_bw_SVM')

accuracy:
0.46151271753681394
precision:
0.39795402856510625
recall:
0.5966525733048212
f1:
0.3090850663312706
matrix_confusion:
[[ 512  712  470]
 [   0    2    0]
 [1321 3933 5002]]
0.44702741475556207
1287
(array([0.04446149, 0.04793937, 0.04824298, ..., 0.81785305, 0.82354507,
       0.82905018]), array([ 1,  2,  1, ..., 25,  1,  1], dtype=int64))
Wall time: 13min 6s


### lula - SVM - tfidf

In [28]:
%%time
model_svc_lula_tfidf, svc_lula_tfidf, df_train_svc_lula_tfidf, df_teste_svc_lula_tfidf = \
train_model_tfidf(SVC(probability=True), df_lula_train, 100)
model_svc_lula_tfidf = model_svc_lula_tfidf.fit(tfidf_teste(df_train_svc_lula_tfidf, svc_lula_tfidf).toarray(), df_train_svc_lula_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_lula = AnaliseSentimentos_tfidf(df_lula, svc_lula_tfidf,model_svc_lula_tfidf, 'predict_tfidf_SVM')

accuracy:
0.5484437751004017
precision:
0.4100319555159723
recall:
0.6317355970508006
f1:
0.34234321683340224
matrix_confusion:
[[ 520  647  527]
 [   0    2    0]
 [1158 3065 6033]]
0.48313863612514424
1685
(array([0.00310619, 0.00343198, 0.00365581, ..., 0.9883467 , 0.98971772,
       0.98984457]), array([ 1,  5, 34, ..., 61, 34,  1], dtype=int64))
Wall time: 10min 21s


### Lula - MLP - bw

In [29]:
%%time
model_mlp_lula_bw, mlp_lula_bw, df_train_mlp_lula_bw, df_teste_mlp_lula_bw = \
train_model_bw(MLPClassifier(max_iter=400), df_lula_train, 100)
model_mlp_lula_bw = model_mlp_lula_bw.fit(Bag_of_words_teste(df_train_mlp_lula_bw, mlp_lula_bw).toarray(), df_train_mlp_lula_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_lula = AnaliseSentimentos_bw(df_lula, mlp_lula_bw,model_mlp_lula_bw, 'predict_bw_MLP')

accuracy:
0.5030120481927711
precision:
0.3843124689511305
recall:
0.47537940183486915
f1:
0.3208598707622176
matrix_confusion:
[[ 690  515  489]
 [   0    1    1]
 [2222 2713 5321]]
0.46087460150739384
2108
(array([4.40526440e-28, 2.74796553e-27, 5.86774776e-27, ...,
       1.00000000e+00, 1.00000000e+00, 1.00000000e+00]), array([  1,   1,   1, ...,   1,   8, 210], dtype=int64))
Wall time: 8min 8s


### Lula - MLP - tfidf

In [30]:
%%time
model_mlp_lula_tfidf, mlp_lula_tfidf, df_train_mlp_lula_tfidf, df_teste_mlp_lula_tfidf = \
train_model_tfidf(MLPClassifier(max_iter=400), df_lula_train, 100)
model_mlp_lula_tfidf = model_mlp_lula_tfidf.fit(Bag_of_words_teste(df_train_mlp_lula_tfidf, mlp_lula_tfidf).toarray(), df_train_mlp_lula_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_lula = AnaliseSentimentos_tfidf(df_lula, mlp_lula_tfidf,model_mlp_lula_tfidf, 'predict_tfidf_MLP')

accuracy:
0.5311244979919679
precision:
0.386700495906289
recall:
0.6435705214513185
f1:
0.3305280406054398
matrix_confusion:
[[ 633  495  566]
 [   0    2    0]
 [1903 2640 5713]]
0.47298088898875384
2469
(array([5.23077590e-05, 9.41820067e-05, 1.01606001e-04, ...,
       9.99596643e-01, 9.99710549e-01, 9.99734884e-01]), array([1, 1, 1, ..., 1, 1, 1], dtype=int64))
Wall time: 9min 4s


### Lula - KNN - bw

In [31]:
%%time
model_knn_lula_bw, knn_lula_bw, df_train_knn_lula_bw, df_teste_knn_lula_bw = \
train_model_bw(KNeighborsClassifier(), df_lula_train, 100)
model_knn_lula_bw = model_knn_lula_bw.fit(Bag_of_words_teste(df_train_knn_lula_bw, knn_lula_bw).toarray(), df_train_knn_lula_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_lula = AnaliseSentimentos_bw(df_lula, knn_lula_bw,model_knn_lula_bw, 'predict_bw_KNN')

accuracy:
0.2978580990629183
precision:
0.36911411945845973
recall:
0.5197740288596197
f1:
0.22281456256501642
matrix_confusion:
[[ 431 1029  234]
 [   0    2    0]
 [2009 5120 3127]]
0.38537640145170676
429
(array([0. , 0.2, 0.4, 0.6, 0.8, 1. ]), array([915078, 257327, 730910, 686135, 147602,  58138], dtype=int64))
Wall time: 2min 47s


### Lula - KNN - tfidf

In [32]:
%%time
model_knn_lula_tfidf, knn_lula_tfidf, df_train_knn_lula_tfidf, df_teste_knn_lula_tfidf = \
train_model_tfidf(KNeighborsClassifier(), df_lula_train, 100)
model_knn_lula_tfidf = model_knn_lula_tfidf.fit(Bag_of_words_teste(df_train_knn_lula_tfidf, knn_lula_tfidf).toarray(), df_train_knn_lula_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_lula = AnaliseSentimentos_tfidf(df_lula, knn_lula_tfidf,model_knn_lula_tfidf, 'predict_tfidf_KNN')

accuracy:
0.5036813922356091
precision:
0.3863291597165987
recall:
0.6600149129932139
f1:
0.32501800223193045
matrix_confusion:
[[ 798  424  472]
 [   0    2    0]
 [2510 2526 5220]]
0.4687608667943381
2982
(array([0. , 0.2, 0.4, 0.6, 0.8, 1. ]), array([542990, 837086, 763165, 390278, 183955,  77716], dtype=int64))
Wall time: 3min 5s


# Simone

### Simone - Arvore de Decisao - bw

In [33]:
%%time
model_tree_simone_bw, tree_simone_bw, df_train_tree_simone_bw, df_teste_tree_simone_bw = \
train_model_bw(tree.DecisionTreeClassifier(), df_simone_train, 100)
model_tree_simone_bw = model_tree_simone_bw.fit(Bag_of_words_teste(df_train_tree_simone_bw, tree_simone_bw), df_train_tree_simone_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_simone = AnaliseSentimentos_bw(df_simone, tree_simone_bw,model_tree_simone_bw, 'predict_bw_ArvoreDeDecisao')


accuracy:
0.4
precision:
0.5192376516606431
recall:
0.6041287188828172
f1:
0.35624123422159887
matrix_confusion:
[[ 2  0  0]
 [20 36  5]
 [45 11 16]]
0.5853356921579117
5
(array([0., 1.]), array([262324, 131162], dtype=int64))
Wall time: 6.57 s


### Simone - Arvore de Decisao - tfidf

In [34]:
%%time
model_tree_simone_tfidf, tree_simone_tfidf, df_train_tree_simone_tfidf, df_teste_tree_simone_tfidf = \
train_model_tfidf(tree.DecisionTreeClassifier(), df_simone_train, 100)
model_tree_simone_tfidf = model_tree_simone_tfidf.fit(tfidf_teste(df_train_tree_simone_tfidf, tree_simone_tfidf), df_train_tree_simone_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_simone = AnaliseSentimentos_tfidf(df_simone, tree_simone_tfidf,model_tree_simone_tfidf, 'predict_tfidf_ArvoreDeDecisao')


accuracy:
0.6592592592592592
precision:
0.5274768267726014
recall:
0.7670006071645415
f1:
0.546627413884051
matrix_confusion:
[[ 2  0  0]
 [ 3 37 21]
 [ 7 15 50]]
0.6250910267701133
7
(array([0., 1.]), array([262324, 131162], dtype=int64))
Wall time: 6.77 s


### Simone - Naive Bayes - bw

In [35]:
%%time
model_nb_simone_bw, nb_simone_bw, df_train_nb_simone_bw, df_teste_nb_simone_bw = \
train_model_bw(GaussianNB(), df_simone_train, 100)
model_nb_simone_bw = model_nb_simone_bw.fit(Bag_of_words_teste(df_train_nb_simone_bw, nb_simone_bw).toarray(), df_train_nb_simone_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_simone = AnaliseSentimentos_bw(df_simone, nb_simone_bw,model_nb_simone_bw, 'predict_bw_NaiveBayes')


accuracy:
0.6148148148148148
precision:
0.5055770887166237
recall:
0.5855343047965998
f1:
0.4687905534587456
matrix_confusion:
[[ 1  1  0]
 [ 6 47  8]
 [13 24 35]]
0.6196861099153903
9
(array([0.00000000e+000, 9.88131292e-324, 1.48219694e-323, ...,
       9.60997685e-088, 2.92225429e-080, 1.00000000e+000]), array([241888,      8,     11, ...,      1,      1, 131162], dtype=int64))
Wall time: 9.81 s


### Simone - Naive Bayes - tfidf

In [36]:
%%time
model_nb_simone_tfidf, nb_simone_tfidf, df_train_nb_simone_tfidf, df_teste_nb_simone_tfidf = \
train_model_tfidf(GaussianNB(), df_simone_train, 100)
model_nb_simone_tfidf = model_nb_simone_tfidf.fit(tfidf_teste(df_train_nb_simone_tfidf, nb_simone_tfidf).toarray(), df_train_nb_simone_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_simone = AnaliseSentimentos_tfidf(df_simone, nb_simone_tfidf,model_nb_simone_tfidf, 'predict_tfidf_NaiveBayes')


accuracy:
0.6074074074074074
precision:
0.5270735524256651
recall:
0.74210686095932
f1:
0.4859862359862359
matrix_confusion:
[[ 2  0  0]
 [ 9 46  6]
 [13 25 34]]
0.5906435141946571
13
(array([0.00000000e+000, 9.88131292e-324, 1.97626258e-323, 2.96439388e-323,
       4.44659081e-323, 1.28457068e-322, 2.76676762e-322, 4.69362364e-322,
       1.63535729e-321, 2.98909716e-321, 3.36458705e-321, 3.88335598e-321,
       1.45452926e-320, 2.34038896e-320, 2.53011017e-320, 5.17187918e-320,
       8.10267659e-320, 8.83241155e-320, 1.46515167e-319, 2.67477259e-319,
       4.74342545e-319, 8.53261252e-319, 2.88466650e-318, 4.44490111e-318,
       4.64536330e-318, 2.95709257e-317, 4.25820753e-317, 4.71599295e-317,
       1.03894831e-316, 1.05036375e-316, 2.16304973e-316, 3.48793187e-316,
       9.46058237e-316, 9.66284346e-316, 1.22488470e-315, 1.72222567e-315,
       9.43836863e-315, 1.23249444e-314, 2.16494016e-314, 1.81499323e-313,
       4.02632263e-313, 5.23360683e-313, 5.48433084e-313, 6.44222

### Simone - Random Forest - bw

In [37]:
%%time
model_RandomForest_simone_bw, RandomForest_simone_bw, df_train_RandomForest_simone_bw, df_teste_RandomForest_simone_bw = \
train_model_bw(RandomForestClassifier(), df_simone_train, 100)
model_RandomForest_simone_bw = model_RandomForest_simone_bw.fit(Bag_of_words_teste(df_train_RandomForest_simone_bw, RandomForest_simone_bw).toarray(), df_train_RandomForest_simone_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_simone = AnaliseSentimentos_bw(df_simone, RandomForest_simone_bw,model_RandomForest_simone_bw, 'predict_bw_RandomForest')


accuracy:
0.6592592592592592
precision:
0.5198412698412698
recall:
0.6158166363084395
f1:
0.521591804570528
matrix_confusion:
[[ 1  1  0]
 [ 1 50 10]
 [ 5 29 38]]
0.6577417860560378
7
(array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ,
       0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,
       0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32,
       0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43,
       0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54,
       0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65,
       0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76,
       0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87,
       0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.95, 0.96, 0.97, 0.98, 0.99,
       1.  ]), array([ 5711,  2452, 13279,  3848,  3828,  5891,  3021,  1313, 15536,
        2417,  2406,  4242,  4223,  8663,  5224, 12846, 14475,  4745,
   

### Simone - Random Forest - tfidf

In [38]:
%%time
model_RandomForest_simone_tfidf, RandomForest_simone_tfidf, df_train_RandomForest_simone_tfidf, df_teste_RandomForest_simone_tfidf = \
train_model_tfidf(RandomForestClassifier(), df_simone_train, 100)
model_RandomForest_simone_tfidf = model_RandomForest_simone_tfidf.fit(tfidf_teste(df_train_RandomForest_simone_tfidf, RandomForest_simone_tfidf).toarray(), df_train_RandomForest_simone_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_simone = AnaliseSentimentos_tfidf(df_simone, RandomForest_simone_tfidf,model_RandomForest_simone_tfidf, 'predict_tfidf_RandomForest')


accuracy:
0.7037037037037037
precision:
0.5584249084249084
recall:
0.796448087431694
f1:
0.5724985112246735
matrix_confusion:
[[ 2  0  0]
 [ 6 39 16]
 [ 5 13 54]]
0.6577688026962449
5
(array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ,
       0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,
       0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32,
       0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43,
       0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54,
       0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65,
       0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76,
       0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87,
       0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98,
       0.99, 1.  ]), array([16577,  3884,  4799,  3971,  4295, 10179,  1303,  2259, 16622,
        5680,  1979,  8947,  1840,  5967,  3140,  3632,  3665,  371

### Simone - SVM - bw

In [39]:
%%time
model_svc_simone_bw, svc_simone_bw, df_train_svc_simone_bw, df_teste_svc_simone_bw = \
train_model_bw(SVC(probability=True), df_simone_train, 100)
model_svc_simone_bw = model_svc_simone_bw.fit(Bag_of_words_teste(df_train_svc_simone_bw, svc_simone_bw).toarray(), df_train_svc_simone_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_simone = AnaliseSentimentos_bw(df_simone, svc_simone_bw,model_svc_simone_bw, 'predict_bw_SVM')

accuracy:
0.5777777777777777
precision:
0.5481316137566138
recall:
0.7110655737704917
f1:
0.470953732370141
matrix_confusion:
[[ 2  0  0]
 [11 31 19]
 [23  4 45]]
0.5746758190366068
7
(array([0.01553689, 0.01667148, 0.01729065, ..., 0.95128424, 0.95154149,
       0.95430718]), array([ 1,  9,  1, ...,  2, 30,  2], dtype=int64))
Wall time: 22.4 s


### Simone - SVM - tfidf

In [40]:
%%time
model_svc_simone_tfidf, svc_simone_tfidf, df_train_svc_simone_tfidf, df_teste_svc_simone_tfidf = \
train_model_tfidf(SVC(probability=True), df_simone_train, 100)
model_svc_simone_tfidf = model_svc_simone_tfidf.fit(tfidf_teste(df_train_svc_simone_tfidf, svc_simone_tfidf).toarray(), df_train_svc_simone_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_simone = AnaliseSentimentos_tfidf(df_simone, svc_simone_tfidf,model_svc_simone_tfidf, 'predict_tfidf_SVM')

accuracy:
0.6148148148148148
precision:
0.5674390968508615
recall:
0.7325440194292653
f1:
0.4903298435213328
matrix_confusion:
[[ 2  0  0]
 [16 29 16]
 [16  4 52]]
0.6012819436540686
16
(array([0.0022314 , 0.00267737, 0.00345845, ..., 0.99114985, 0.99338244,
       0.99431015]), array([5326,    5, 5326, ...,    4,    5, 5326], dtype=int64))
Wall time: 23.5 s


### Simone - MLP - bw

In [41]:
%%time
model_mlp_simone_bw, mlp_simone_bw, df_train_mlp_simone_bw, df_teste_mlp_simone_bw = \
train_model_bw(MLPClassifier(max_iter=400), df_simone_train, 100)
model_mlp_simone_bw = model_mlp_simone_bw.fit(Bag_of_words_teste(df_train_mlp_simone_bw, mlp_simone_bw).toarray(), df_train_mlp_simone_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_simone = AnaliseSentimentos_bw(df_simone, mlp_simone_bw,model_mlp_simone_bw, 'predict_bw_MLP')

accuracy:
0.5925925925925926
precision:
0.5165151887333169
recall:
0.5632969034608379
f1:
0.46510338636417686
matrix_confusion:
[[ 1  0  1]
 [ 8 37 16]
 [21  9 42]]
0.5702011105544619
6
(array([3.64815882e-08, 4.21766154e-07, 5.01434695e-07, ...,
       9.99997310e-01, 9.99998113e-01, 9.99998731e-01]), array([ 1, 19,  3, ...,  1,  1,  1], dtype=int64))
Wall time: 1min 39s


### Simone - MLP - tfidf

In [42]:
%%time
model_mlp_simone_tfidf, mlp_simone_tfidf, df_train_mlp_simone_tfidf, df_teste_mlp_simone_tfidf = \
train_model_tfidf(MLPClassifier(max_iter=400), df_simone_train, 100)
model_mlp_simone_tfidf = model_mlp_simone_tfidf.fit(Bag_of_words_teste(df_train_mlp_simone_tfidf, mlp_simone_tfidf).toarray(), df_train_mlp_simone_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_simone = AnaliseSentimentos_tfidf(df_simone, mlp_simone_tfidf,model_mlp_simone_tfidf, 'predict_tfidf_MLP')

accuracy:
0.6148148148148148
precision:
0.5107570025602812
recall:
0.7425622343655132
f1:
0.4946530348434102
matrix_confusion:
[[ 2  0  0]
 [ 8 41 12]
 [12 20 40]]
0.5906967716460049
12
(array([1.00259699e-04, 1.09903167e-04, 1.23524115e-04, ...,
       9.99150944e-01, 9.99242045e-01, 9.99245544e-01]), array([1, 1, 1, ..., 1, 1, 1], dtype=int64))
Wall time: 1min 53s


### Simone - KNN - bw

In [43]:
%%time
model_knn_simone_bw, knn_simone_bw, df_train_knn_simone_bw, df_teste_knn_simone_bw = \
train_model_bw(KNeighborsClassifier(), df_simone_train, 100)
model_knn_simone_bw = model_knn_simone_bw.fit(Bag_of_words_teste(df_train_knn_simone_bw, knn_simone_bw).toarray(), df_train_knn_simone_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_simone = AnaliseSentimentos_bw(df_simone, knn_simone_bw,model_knn_simone_bw, 'predict_bw_KNN')

accuracy:
0.6814814814814815
precision:
0.4943249701314218
recall:
0.4501366120218579
f1:
0.4538581693220867
matrix_confusion:
[[ 0  0  2]
 [ 4 29 28]
 [ 2  7 63]]
0.6071774614419219
1
(array([0. , 0.2, 0.4, 0.6, 0.8, 1. ]), array([ 32896, 196164,  48366, 106917,   3552,   5591], dtype=int64))
Wall time: 8.39 s


### Simone - KNN - tfidf

In [44]:
%%time
model_knn_simone_tfidf, knn_simone_tfidf, df_train_knn_simone_tfidf, df_teste_knn_simone_tfidf = \
train_model_tfidf(KNeighborsClassifier(), df_simone_train, 100)
model_knn_simone_tfidf = model_knn_simone_tfidf.fit(Bag_of_words_teste(df_train_knn_simone_tfidf, knn_simone_tfidf).toarray(), df_train_knn_simone_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_simone = AnaliseSentimentos_tfidf(df_simone, knn_simone_tfidf,model_knn_simone_tfidf, 'predict_tfidf_KNN')

accuracy:
0.674074074074074
precision:
0.5625180375180375
recall:
0.7812689738919247
f1:
0.5377661125692622
matrix_confusion:
[[ 2  0  0]
 [ 9 43  9]
 [13 13 46]]
0.6389067995133246
13
(array([0. , 0.2, 0.4, 0.6, 0.8, 1. ]), array([ 70874, 101496, 138248,  60856,  14810,   7202], dtype=int64))
Wall time: 22.4 s


# Ciro

### Ciro - Arvore de Decisao - bw

In [45]:
%%time
model_tree_ciro_bw, tree_ciro_bw, df_train_tree_ciro_bw, df_teste_tree_ciro_bw = \
train_model_bw(tree.DecisionTreeClassifier(), df_ciro_train, 100)
model_tree_ciro_bw = model_tree_ciro_bw.fit(Bag_of_words_teste(df_train_tree_ciro_bw, tree_ciro_bw), df_train_tree_ciro_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_ciro = AnaliseSentimentos_bw(df_ciro, tree_ciro_bw,model_tree_ciro_bw, 'predict_bw_ArvoreDeDecisao')


accuracy:
0.2699822380106572
precision:
0.3351409523226501
recall:
0.4868631665599561
f1:
0.1456507505707185
matrix_confusion:
[[   1    1    0]
 [   5   18    3]
 [2229 2694 1805]]
0.4490685876668354
1503
(array([0., 1.]), array([996132, 498066], dtype=int64))
Wall time: 45.3 s


### Ciro - Arvore de Decisao - tfidf

In [46]:
%%time
model_tree_ciro_tfidf, tree_ciro_tfidf, df_train_tree_ciro_tfidf, df_teste_tree_ciro_tfidf = \
train_model_tfidf(tree.DecisionTreeClassifier(), df_ciro_train, 100)
model_tree_ciro_tfidf = model_tree_ciro_tfidf.fit(tfidf_teste(df_train_tree_ciro_tfidf, tree_ciro_tfidf), df_train_tree_ciro_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_ciro = AnaliseSentimentos_tfidf(df_ciro, tree_ciro_tfidf,model_tree_ciro_tfidf, 'predict_tfidf_ArvoreDeDecisao')


accuracy:
0.5057726465364121
precision:
0.33576421798978234
recall:
0.6685493460166468
f1:
0.2298762030229988
matrix_confusion:
[[   2    0    0]
 [   6   13    7]
 [1756 1570 3402]]
0.43499060339146
1756
(array([0., 1.]), array([996132, 498066], dtype=int64))
Wall time: 45.5 s


### Ciro - Naive Bayes - bw

In [47]:
%%time
model_nb_ciro_bw, nb_ciro_bw, df_train_nb_ciro_bw, df_teste_nb_ciro_bw = \
train_model_bw(GaussianNB(), df_ciro_train, 100)
model_nb_ciro_bw = model_nb_ciro_bw.fit(Bag_of_words_teste(df_train_nb_ciro_bw, nb_ciro_bw).toarray(), df_train_nb_ciro_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_ciro = AnaliseSentimentos_bw(df_ciro, nb_ciro_bw,model_nb_ciro_bw, 'predict_bw_NaiveBayes')


accuracy:
0.3932800473653049
precision:
0.33594657780324844
recall:
0.6181247903899509
f1:
0.19407499652824775
matrix_confusion:
[[   2    0    0]
 [  10   12    4]
 [2704 1381 2643]]
0.42711898318739855
2269
(array([0.00000000e+000, 9.88131292e-324, 1.48219694e-323, ...,
       1.07710351e-003, 9.98922896e-001, 1.00000000e+000]), array([934397,     38,     78, ...,      3,      3, 498063], dtype=int64))
Wall time: 1min 30s


### Ciro - Naive Bayes - tfidf

In [48]:
%%time
model_nb_ciro_tfidf, nb_ciro_tfidf, df_train_nb_ciro_tfidf, df_teste_nb_ciro_tfidf = \
train_model_tfidf(GaussianNB(), df_ciro_train, 100)
model_nb_ciro_tfidf = model_nb_ciro_tfidf.fit(tfidf_teste(df_train_nb_ciro_tfidf, nb_ciro_tfidf).toarray(), df_train_nb_ciro_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_ciro = AnaliseSentimentos_tfidf(df_ciro, nb_ciro_tfidf,model_nb_ciro_tfidf, 'predict_tfidf_NaiveBayes')


accuracy:
0.5267910005920663
precision:
0.3364742509447365
recall:
0.6500426842281777
f1:
0.2370532540795254
matrix_confusion:
[[   2    0    0]
 [  10   11    5]
 [2079 1103 3546]]
0.43759029746112643
2079
(array([0.0e+000, 9.9e-324, 1.5e-323, ..., 1.0e+000, 1.0e+000, 1.0e+000]), array([954855,     23,     19, ...,      1,      4, 497864], dtype=int64))
Wall time: 1min 31s


### Ciro - Random Forest - bw

In [49]:
%%time
model_RandomForest_ciro_bw, RandomForest_ciro_bw, df_train_RandomForest_ciro_bw, df_teste_RandomForest_ciro_bw = \
train_model_bw(RandomForestClassifier(), df_ciro_train, 100)
model_RandomForest_ciro_bw = model_RandomForest_ciro_bw.fit(Bag_of_words_teste(df_train_RandomForest_ciro_bw, RandomForest_ciro_bw).toarray(), df_train_RandomForest_ciro_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_ciro = AnaliseSentimentos_bw(df_ciro, RandomForest_ciro_bw,model_RandomForest_ciro_bw, 'predict_bw_RandomForest')


accuracy:
0.3845470692717584
precision:
0.33472005650236686
recall:
0.6279726516052319
f1:
0.18956283893693512
matrix_confusion:
[[   2    0    0]
 [   5   13    8]
 [2093 2052 2583]]
0.412277544850584
1305
(array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ,
       0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,
       0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32,
       0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43,
       0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54,
       0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65,
       0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76,
       0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87,
       0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98,
       0.99, 1.  ]), array([ 1265,  7670, 13147, 10080,  6899,  8285,  9990, 10480, 13651,
       17574, 29920, 16420, 18160, 22497, 24

### Ciro - Random Forest - tfidf

In [50]:
%%time
model_RandomForest_ciro_tfidf, RandomForest_ciro_tfidf, df_train_RandomForest_ciro_tfidf, df_teste_RandomForest_ciro_tfidf = \
train_model_tfidf(RandomForestClassifier(), df_ciro_train, 100)
model_RandomForest_ciro_tfidf = model_RandomForest_ciro_tfidf.fit(tfidf_teste(df_train_RandomForest_ciro_tfidf, RandomForest_ciro_tfidf).toarray(), df_train_RandomForest_ciro_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_ciro = AnaliseSentimentos_tfidf(df_ciro, RandomForest_ciro_tfidf,model_RandomForest_ciro_tfidf, 'predict_tfidf_RandomForest')


accuracy:
0.5134695085849615
precision:
0.3349006141319262
recall:
0.6072708009390531
f1:
0.23110765030979538
matrix_confusion:
[[   2    0    0]
 [   8    8   10]
 [2069 1200 3459]]
0.42168714349143405
2069
(array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ,
       0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,
       0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32,
       0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43,
       0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54,
       0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65,
       0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76,
       0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87,
       0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98]), array([  306,  3262,  2011,  3271,  5560,  6281,  7256,  9144,  8406,
        9795, 13570, 17106, 18610, 19554, 20235, 26408, 22638,

### Ciro - SVM - bw

In [51]:
%%time
model_svc_ciro_bw, svc_ciro_bw, df_train_svc_ciro_bw, df_teste_svc_ciro_bw = \
train_model_bw(SVC(probability=True), df_ciro_train, 100)
model_svc_ciro_bw = model_svc_ciro_bw.fit(Bag_of_words_teste(df_train_svc_ciro_bw, svc_ciro_bw).toarray(), df_train_svc_ciro_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_ciro = AnaliseSentimentos_bw(df_ciro, svc_ciro_bw,model_svc_ciro_bw, 'predict_bw_SVM')

accuracy:
0.2470396684428656
precision:
0.33744428254432707
recall:
0.4025580048172201
f1:
0.14078854438268093
matrix_confusion:
[[   1    0    1]
 [  12   12    2]
 [4221  851 1656]]
0.3645319052449232
3322
(array([0.02240704, 0.02252773, 0.02496058, ..., 0.83151511, 0.83761853,
       0.8625048 ]), array([6, 1, 2, ..., 6, 1, 1], dtype=int64))
Wall time: 5min 14s


### Ciro - SVM - tfidf

In [52]:
%%time
model_svc_ciro_tfidf, svc_ciro_tfidf, df_train_svc_ciro_tfidf, df_teste_svc_ciro_tfidf = \
train_model_tfidf(SVC(probability=True), df_ciro_train, 100)
model_svc_ciro_tfidf = model_svc_ciro_tfidf.fit(tfidf_teste(df_train_svc_ciro_tfidf, svc_ciro_tfidf).toarray(), df_train_svc_ciro_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_ciro = AnaliseSentimentos_tfidf(df_ciro, svc_ciro_tfidf,model_svc_ciro_tfidf, 'predict_tfidf_SVM')

accuracy:
0.5207223208999407
precision:
0.33804133084785515
recall:
0.6735533095521204
f1:
0.23807800549024974
matrix_confusion:
[[   2    0    0]
 [   9   13    4]
 [2336  889 3503]]
0.44259874169754154
2336
(array([0.14646846, 0.15036453, 0.15494565, ..., 0.56595912, 0.57124158,
       0.57583995]), array([10,  2,  1, ...,  6,  2, 10], dtype=int64))
Wall time: 5min 32s


### Ciro - MLP - bw

In [53]:
%%time
model_mlp_ciro_bw, mlp_ciro_bw, df_train_mlp_ciro_bw, df_teste_mlp_ciro_bw = \
train_model_bw(MLPClassifier(max_iter=400), df_ciro_train, 100)
model_mlp_ciro_bw = model_mlp_ciro_bw.fit(Bag_of_words_teste(df_train_mlp_ciro_bw, mlp_ciro_bw).toarray(), df_train_mlp_ciro_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_ciro = AnaliseSentimentos_bw(df_ciro, mlp_ciro_bw,model_mlp_ciro_bw, 'predict_bw_MLP')

accuracy:
0.43220840734162225
precision:
0.3351844180687204
recall:
0.6566968505137352
f1:
0.20669888714699527
matrix_confusion:
[[   2    0    0]
 [   2   14   10]
 [2095 1729 2904]]
0.42644599138702194
1823
(array([4.64868497e-22, 5.54149967e-22, 5.64662805e-22, ...,
       1.00000000e+00, 1.00000000e+00, 1.00000000e+00]), array([ 5,  2,  2, ...,  2,  1, 41], dtype=int64))
Wall time: 4min 22s


### Ciro - MLP - tfidf

In [54]:
%%time
model_mlp_ciro_tfidf, mlp_ciro_tfidf, df_train_mlp_ciro_tfidf, df_teste_mlp_ciro_tfidf = \
train_model_tfidf(MLPClassifier(max_iter=400), df_ciro_train, 100)
model_mlp_ciro_tfidf = model_mlp_ciro_tfidf.fit(Bag_of_words_teste(df_train_mlp_ciro_tfidf, mlp_ciro_tfidf).toarray(), df_train_mlp_ciro_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_ciro = AnaliseSentimentos_tfidf(df_ciro, mlp_ciro_tfidf,model_mlp_ciro_tfidf, 'predict_tfidf_MLP')

accuracy:
0.5220544701006513
precision:
0.33616122241331065
recall:
0.6739992072929053
f1:
0.23583187644688106
matrix_confusion:
[[   2    0    0]
 [   3   13   10]
 [1968 1248 3512]]
0.44201169406343704
1968
(array([5.04399354e-05, 9.26305584e-05, 1.22747143e-04, ...,
       9.99595006e-01, 9.99651807e-01, 9.99677095e-01]), array([1, 1, 1, ..., 1, 1, 1], dtype=int64))
Wall time: 4min 59s


### Ciro - KNN - bw

In [55]:
%%time
model_knn_ciro_bw, knn_ciro_bw, df_train_knn_ciro_bw, df_teste_knn_ciro_bw = \
train_model_bw(KNeighborsClassifier(), df_ciro_train, 100)
model_knn_ciro_bw = model_knn_ciro_bw.fit(Bag_of_words_teste(df_train_knn_ciro_bw, knn_ciro_bw).toarray(), df_train_knn_ciro_bw['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_ciro = AnaliseSentimentos_bw(df_ciro, knn_ciro_bw,model_knn_ciro_bw, 'predict_bw_KNN')

accuracy:
0.19789816459443457
precision:
0.33320701643283407
recall:
0.39888030122869605
f1:
0.11268574229539019
matrix_confusion:
[[   1    1    0]
 [   6   13    7]
 [2523 2882 1323]]
0.41590233583917724
1338
(array([0. , 0.2, 0.4, 0.6, 0.8, 1. ]), array([178338, 593137, 340254, 322812,  50036,   9621], dtype=int64))
Wall time: 1min 32s


### Ciro - KNN - tfidf

In [56]:
%%time
model_knn_ciro_tfidf, knn_ciro_tfidf, df_train_knn_ciro_tfidf, df_teste_knn_ciro_tfidf = \
train_model_tfidf(KNeighborsClassifier(), df_ciro_train, 100)
model_knn_ciro_tfidf = model_knn_ciro_tfidf.fit(Bag_of_words_teste(df_train_knn_ciro_tfidf, knn_ciro_tfidf).toarray(), df_train_knn_ciro_tfidf['Sentimento'].str.replace('p','1').replace('x','0').replace('n','-1').astype(int))

df_ciro = AnaliseSentimentos_tfidf(df_ciro, knn_ciro_tfidf,model_knn_ciro_tfidf, 'predict_tfidf_KNN')

accuracy:
0.3716696269982238
precision:
0.3343062367397345
recall:
0.5853494008963688
f1:
0.18442608178920894
matrix_confusion:
[[   2    0    0]
 [   8   10    8]
 [2337 1892 2499]]
0.368937836605884
2337
(array([0. , 0.2, 0.4, 0.6, 0.8, 1. ]), array([247687, 470717, 424963, 253542,  77384,  19905], dtype=int64))
Wall time: 1min 47s


In [57]:
df_bolsonaro.to_excel('bolsonaro_predict4.xlsx')
df_lula.to_excel('lula_predict4.xlsx')
df_simone.to_excel('simone_predict4.xlsx')
df_ciro.to_excel('ciro_predict4.xlsx')

In [58]:
def VotacaoPredict(df):
    for i in df.index:
        pos=0
        neu=0
        neg=0
        for j in ['predict_tfidf_MLP','predict_bw_MLP',
                  'predict_tfidf_SVM','predict_bw_SVM',
                 'predict_tfidf_RandomForest','predict_bw_RandomForest',
                 'predict_tfidf_NaiveBayes','predict_bw_NaiveBayes',
                 'predict_tfidf_ArvoreDeDecisao','predict_bw_ArvoreDeDecisao',
                 'predict_tfidf_KNN', 'predict_bw_KNN']:
            
            pos+=df.loc[i,j+'1']
            neu+=df.loc[i,j+'0']    
            neg+=df.loc[i,j+'-1']   
        
        if((neu>=neg)& (neu>=pos)):
            df.loc[i,'VotacaoPredict_norm'] = 0
        elif((neg>=pos)& (neg>=neu)):
            df.loc[i,'VotacaoPredict_norm'] = -1
        else:
            df.loc[i,'VotacaoPredict_norm'] = 1     
    #print(df['VotacaoPredict_ajust'].value_counts())
    print(df['VotacaoPredict_norm'].value_counts())        
    return df

In [59]:
df_bolsonaro_predict2 = VotacaoPredict(df_bolsonaro)
df_lula_predict2 = VotacaoPredict(df_lula)
df_simone_predict2 = VotacaoPredict(df_simone)
df_ciro_predict2 = VotacaoPredict(df_ciro)

 1.0    500381
 0.0    347499
-1.0     98558
Name: VotacaoPredict_norm, dtype: int64
 0.0    514145
 1.0    343190
-1.0     74395
Name: VotacaoPredict_norm, dtype: int64
 0.0    58196
-1.0    40686
 1.0    32280
Name: VotacaoPredict_norm, dtype: int64
 1.0    213701
-1.0    146145
 0.0    138220
Name: VotacaoPredict_norm, dtype: int64


In [61]:
df_bolsonaro_predict2.to_excel('bolsonaro_resultado_equilibrio5.xlsx')
df_lula_predict2.to_excel('lula_resultado_equilibrio5.xlsx')
df_simone_predict2.to_excel('simone_resultado_equilibrio5.xlsx')
df_ciro_predict2.to_excel('ciro_resultado_equilibrio5.xlsx')

In [None]:
# df_bolsonaro = pd.read_excel('bolsonaro_predict1.xlsx', index_col=0)
# df_lula = pd.read_excel('lula_predict1.xlsx', index_col=0)
# df_simone = pd.read_excel('simone_predict1.xlsx', index_col=0)
# df_ciro = pd.read_excel('ciro_predict1.xlsx', index_col=0)