In [1]:
# coding: utf-8

import pandas as pd

from files.base import Base

df = pd.read_csv(Base.FILE_FINAL, sep=";")
df.columns = [
    'key',
    'Artigo',
    'titulo',
    'Manual',
    'Naive Bayes',
    'SentiStregth',
    'Positivo',
    'Negativo',
    'Neutro',
    'Link',
    'Origem',
    'id',
    'Frases',
    'Palavras de Citação',
    'caracteres',
    'article_clean',
    'article_len',
    'article_tokens',
    'article_tokens_word2vec',
    'Bidirecional Naive',
    'Bidirecional Senti'
]
df.head(2)


Unnamed: 0,key,Artigo,titulo,Manual,Naive Bayes,SentiStregth,Positivo,Negativo,Neutro,Link,...,id,Frases,Palavras de Citação,caracteres,article_clean,article_len,article_tokens,article_tokens_word2vec,Bidirecional Naive,Bidirecional Senti
0,0,"Doria chama gestão da pandemia de 'descaso', e...","Doria chama gestão da pandemia de 'descaso', e...",,partial,partial,2,-3,-1,https://diariodonordeste.verdesmares.com.br/po...,...,1,25,4,2449,doria chama gestão pandemia descaso queiroga d...,1907,"['doria', 'chama', 'gestão', 'pandemia', 'desc...","[18328, 2200, 658, 33346, 28604, 54972, 222, 1...",partial,partial
1,1,Câmara aprova suspensão da carteira do motoris...,Câmara aprova suspensão da carteira do motoris...,,partial,partial,3,-3,-1,https://g1.globo.com/politica/noticia/2021/09/...,...,2,24,4,3435,câmara aprova suspensão carteira motorista pub...,2564,"['câmara', 'aprova', 'suspensão', 'carteira', ...","[444, 7227, 2480, 3965, 2671, 5488, 14052, 162...",partial,partial


In [2]:
dataFrameAnalytics = pd.DataFrame(columns=[
    'model',
    'accuracy',
    'recall_impartial',
    'precision_imparcial',
    'recall_partial',
    'precision_parcial',
    'TP_impartial',
    'FP_impartial',
    'FN_impartial',
    'TN_impartial',
    'TP_partial',
    'FP_partial',
    'FN_partial',
    'TN_partial',
])

def metric_model(key):
    manual = df['Manual'].value_counts()
    hit = df[df[key] == df['Manual']][key].value_counts()
    
    TP_impartial = hit.impartial
    FP_impartial = manual.partial - hit.partial
    FN_impartial = manual.impartial - hit.impartial
    TN_impartial = hit.partial

    TP_partial = hit.partial
    FP_partial = manual.impartial - hit.impartial
    FN_partial = manual.partial - hit.partial
    TN_partial = hit.impartial

    accuracy = (TP_impartial + TP_partial) / (manual.impartial + manual.partial)

    recall_impartial = TP_impartial / (TP_impartial + FN_impartial)
    precision_imparcial = TP_impartial / (TP_impartial + FP_impartial)

    recall_partial = TP_partial / (TP_partial + FN_partial)
    precision_parcial = TP_partial / (TP_partial + FP_partial)

    return {
        'model': key,
        'accuracy': accuracy,
        'recall_impartial': recall_impartial,
        'precision_imparcial': precision_imparcial,
        'recall_partial': recall_partial,
        'precision_parcial': precision_parcial,
        'TP_impartial': TP_impartial,
        'FP_impartial': FP_impartial,
        'FN_impartial': FN_impartial,
        'TN_impartial': TN_impartial,
        'TP_partial': TP_partial,
        'FP_partial': FP_partial,
        'FN_partial': FN_partial,
        'TN_partial': TN_partial
    }

dataFrameAnalytics = dataFrameAnalytics.append(metric_model('Bidirecional Senti'), ignore_index=True)
dataFrameAnalytics = dataFrameAnalytics.append(metric_model('Bidirecional Naive'), ignore_index=True)
dataFrameAnalytics = dataFrameAnalytics.append(metric_model('Naive Bayes'), ignore_index=True)
dataFrameAnalytics = dataFrameAnalytics.append(metric_model('SentiStregth'), ignore_index=True)

analyticsView = pd.DataFrame(columns=[
    'metric',
    'Bidirecional Senti',
    'Bidirecional Naive',
    'Naive Bayes',
    'SentiStregth',
])

def metric_view(key):
    return {
        'metric': key,
        'Bidirecional Senti': str(round(dataFrameAnalytics[dataFrameAnalytics['model'] == 'Bidirecional Senti'][key].values[0] * 100, 2)) + ' %',
        'Bidirecional Naive': str(round(dataFrameAnalytics[dataFrameAnalytics['model'] == 'Bidirecional Naive'][key].values[0] * 100, 2)) + ' %',
        'Naive Bayes': str(round(dataFrameAnalytics[dataFrameAnalytics['model'] == 'Naive Bayes'][key].values[0] * 100, 2)) + ' %',
        'SentiStregth': str(round(dataFrameAnalytics[dataFrameAnalytics['model'] == 'SentiStregth'][key].values[0] * 100, 2)) + ' %'
    }

analyticsView = analyticsView.append(metric_view('accuracy'), ignore_index=True)
analyticsView = analyticsView.append(metric_view('recall_impartial'), ignore_index=True)
analyticsView = analyticsView.append(metric_view('precision_imparcial'), ignore_index=True)
analyticsView = analyticsView.append(metric_view('recall_partial'), ignore_index=True)
analyticsView = analyticsView.append(metric_view('precision_parcial'), ignore_index=True)

analyticsView


Unnamed: 0,metric,Bidirecional Senti,Bidirecional Naive,Naive Bayes,SentiStregth
0,accuracy,77.33 %,74.67 %,73.33 %,57.33 %
1,recall_impartial,84.91 %,84.48 %,84.91 %,57.33 %
2,precision_imparcial,85.65 %,83.05 %,81.4 %,82.1 %
3,recall_partial,51.47 %,41.18 %,33.82 %,57.35 %
4,precision_parcial,50.0 %,43.75 %,39.66 %,28.26 %


In [3]:
dataFrameAnalytics = pd.DataFrame(columns=[
    '',
    'Total',
    'Imparcial',
    'Parcial'
])

#training = df[~df['Manual'].isin(['impartial', 'partial'])]

#naive = training['Naive Bayes'].value_counts()
#senti = training['SentiStregth'].value_counts()

manual = df['Manual'].value_counts()
b_naive = df['Bidirecional Naive'].value_counts()
b_senti = df['Bidirecional Senti'].value_counts()
naive = df['Naive Bayes'].value_counts()
senti = df['SentiStregth'].value_counts()

dataFrameAnalytics = dataFrameAnalytics.append({
    '': 'Manual',
    'Total': (manual.impartial + manual.partial),
    'Imparcial': manual.impartial,
    'Parcial': manual.partial
}, ignore_index=True)

dataFrameAnalytics = dataFrameAnalytics.append({
    '': 'Bidirecional Naive',
    'Total': (b_naive.impartial + b_naive.partial),
    'Imparcial': b_naive.impartial,
    'Parcial': b_naive.partial
}, ignore_index=True)

dataFrameAnalytics = dataFrameAnalytics.append({
    '': 'Bidirecional Senti',
    'Total': (b_senti.impartial + b_senti.partial),
    'Imparcial': b_senti.impartial,
    'Parcial': b_senti.partial
}, ignore_index=True)

dataFrameAnalytics = dataFrameAnalytics.append({
    '': 'Naive Bayes',
    'Total': (naive.impartial + naive.partial),
    'Imparcial': naive.impartial,
    'Parcial': naive.partial
}, ignore_index=True)

dataFrameAnalytics = dataFrameAnalytics.append({
    '': 'SentiStregth',
    'Total': (senti.impartial + senti.partial),
    'Imparcial': senti.impartial,
    'Parcial': senti.partial
}, ignore_index=True)

dataFrameAnalytics

Unnamed: 0,Unnamed: 1,Total,Imparcial,Parcial
0,Manual,300,232,68
1,Bidirecional Naive,18175,10847,7328
2,Bidirecional Senti,18175,10079,8096
3,Naive Bayes,18175,10344,7831
4,SentiStregth,18175,8501,9674


In [4]:
total = df['id'].count()
totalDN = df[(df['Origem'] == "Diário do Nordeste")]['id'].count()
totalG1 = df[(df['Origem'] == "G1 Ceará")]['id'].count()
totalOP = df[(df['Origem'] == "O Povo")]['id'].count()

impartial = df[df['Bidirecional Senti'] == "impartial"]['id'].count()
pImpartial = impartial / total * 100

impartialDN = df[(df['Origem'] == "Diário do Nordeste") & (df['Bidirecional Senti'] == "impartial")]['id'].count()
pImpartialDN = impartialDN / totalDN * 100

impartialG1 = df[(df['Origem'] == "G1 Ceará") & (df['Bidirecional Senti'] == "impartial")]['id'].count()
pImpartialG1 = impartialG1 / totalG1 * 100

impartialOP = df[(df['Origem'] == "O Povo") & (df['Bidirecional Senti'] == "impartial")]['id'].count()
pImpartialOP = impartialOP / totalOP * 100

partial = df[df['Bidirecional Senti'] == "partial"]['id'].count()
pPartial = partial / total * 100

partialDN = df[(df['Origem'] == "Diário do Nordeste") & (df['Bidirecional Senti'] == "partial")]['id'].count()
pPartialDN = partialDN / totalDN * 100

partialG1 = df[(df['Origem'] == "G1 Ceará") & (df['Bidirecional Senti'] == "partial")]['id'].count()
pPartialG1 = partialG1 / totalG1 * 100

partialOP = df[(df['Origem'] == "O Povo") & (df['Bidirecional Senti'] == "partial")]['id'].count()
pPartialOP = partialOP / totalOP * 100

analytics = pd.DataFrame({
    'total' : {
        'Total': total,
        'Diário do Nordeste': totalDN,
        'G1 Ceará': totalG1,
        'O Povo': totalOP
    },
    'impartial' : {
        'Total': str(impartial) + " ( " + str(round(pImpartial)) + "% )",
        'Diário do Nordeste': str(impartialDN) + " ( " + str(round(pImpartialDN)) + "% )",
        'G1 Ceará': str(impartialG1) + " ( " + str(round(pImpartialG1)) + "% )",
        'O Povo': str(impartialOP) + " ( " + str(round(pImpartialOP)) + "% )",
    },
    'partial' : {
        'Total':  str(partial) + " ( " + str(round(pPartial)) + "% )",
        'Diário do Nordeste': str(partialDN) + " ( " + str(round(pPartialDN)) + "% )",
        'G1 Ceará': str(partialG1) + " ( " + str(round(pPartialG1)) + "% )",
        'O Povo': str(partialOP) + " ( " + str(round(pPartialOP)) + "% )",
    },
})

analytics

Unnamed: 0,total,impartial,partial
Total,18175,10079 ( 55% ),8096 ( 45% )
Diário do Nordeste,2570,1055 ( 41% ),1515 ( 59% )
G1 Ceará,7101,3615 ( 51% ),3486 ( 49% )
O Povo,8504,5409 ( 64% ),3095 ( 36% )


In [5]:
totalOP = df[(df['Origem'] == "O Povo")]['id'].count()
totalG1 = df[(df['Origem'] == "G1 Ceará")]['id'].count()
totalDN = df[(df['Origem'] == "Diário do Nordeste")]['id'].count()

somaTamanho = df.groupby(by=['Origem'])['caracteres'].sum()

analytics = pd.DataFrame({
    'Tamanho Médio das Notícias' : {
        'O Povo': int(somaTamanho["O Povo"] / totalOP),
        'G1 Ceará': int(somaTamanho["G1 Ceará"] / totalG1),
        'Diário do Nordeste': int(somaTamanho["Diário do Nordeste"] / totalDN),
    },
    'Quant. de Notícias' : {
        'O Povo': totalOP,
        'G1 Ceará': totalG1,
        'Diário do Nordeste': totalDN,
    },
})

analytics

Unnamed: 0,Tamanho Médio das Notícias,Quant. de Notícias
O Povo,1809,8504
G1 Ceará,3164,7101
Diário do Nordeste,3701,2570
