In [49]:
create_tokenizedDF= False
import pandas as pd
import numpy as np
pd.options.mode.chained_assignment = None  # default='warn'
from collections import Counter
import nltk

stop_words_nltk = nltk.corpus.stopwords.words('portuguese')
snowball = nltk.stem.snowball.SnowballStemmer('portuguese')
right=[ 'PSD',  'CDS-PP', 'CH','IL','CDS']
left=[ 'PS', 'BE', 'PCP', 'PAN', 'PEV','L']

In [2]:
def normlizeTokens(tokenLst, stopwordLst = None, stemmer = None):
    #We can use a generator here as we just need to iterate over it

    #Lowering the case and removing non-words
    workingIter = (w.lower() for w in tokenLst if w.isalpha())

    #Now we can use the semmer, if provided
    if stemmer is not None:
        workingIter = (stemmer.stem(w) for w in workingIter)
         
    #And remove the stopwords
    if stopwordLst is not None:
        workingIter = (w for w in workingIter if w not in stopwordLst)
    #We will return a list with the stopwords removed
    return list(workingIter)

def create_tokenized_dataset(df_input):
    # 1 create token column: tokens
    df_input['tokens']=df_input['speech'].apply(lambda x: nltk.word_tokenize(x))
    # 2 create token without stopwords and stemmer: tokens_stemer_stop. 16min
    df_input['tokens_stemer_stop'] = df_input['tokens'].apply(lambda x: normlizeTokens(x, stopwordLst = stop_words_nltk, stemmer = snowball))
    
    # 3 extract very frequent or rare words: token_cleaned
    flat_tokens=[item for sublist in df_input['tokens_stemer_stop'] for item in sublist]
    aux_c=Counter(flat_tokens)
        
    extrat_common=['par', 'nao', 'sr', 'deput', 'govern', 'muit', 'pel', 'president','tod','tamb','srs','sras','pod','part','psd','sao','aplaus','ja','porqu','faz','ha','diz','quer','pais','sobr','bem','nest']
    extract_rare=[x[0] for x in aux_c.most_common()[-30:]]

    df_input['tokens_cleaned'] = df_input['tokens_stemer_stop'].apply(lambda x : [i for i in x if not i in extrat_common])
    df_input['tokens_cleaned'] = df_input['tokens_cleaned'].apply(lambda x : [i for i in x if not i in extract_rare])
    df_input.to_pickle('dftok.pkl')  
    return df_input

def generate_N_grams(text,ngram=1):
  words=[word for word in text]  
  temp=zip(*[words[i:] for i in range(0,ngram)])
  ans=[' '.join(ngram) for ngram in temp]
  return ans

def create_grams(df_frame, n):
    grams_d={1: 'uni_grams', 2: 'bi_grams', 3: 'tri_grams'}
    df_frame[grams_d[n]] = df_frame['tokens_cleaned'].apply(lambda x: generate_N_grams(x,n))
    return df_frame 

In [3]:
if create_tokenizedDF==True:
    df_input = pd.read_pickle('parliament_fdf.pkl')  
    df_tok=create_tokenized_dataset(df_input)
else:
    df_tok = pd.read_pickle('dftok.pkl')  
    

In [4]:
#df_tok=create_grams(df_tok, 2)
df_tok=create_grams(df_tok, 3)

In [5]:
df_tok.head(3)

Unnamed: 0,speaker,speech,filename,number,session,term,Date,link,party,speaker_ntime,genre,len_speech,year,tokens,tokens_stemer_stop,tokens_cleaned,tri_grams
0,josemanuelpureza,muito bem,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,BE,891.0,M,2,2015,"[muito, bem]","[muit, bem]",[],[]
1,teresalealcoelho,muito bem,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,87.0,F,2,2015,"[muito, bem]","[muit, bem]",[],[]
2,luismontenegro,muito bom dia a todos sras e srs deputados cum...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,204,2015,"[muito, bom, dia, a, todos, sras, e, srs, depu...","[muit, bom, dia, tod, sras, srs, deput, cumpr,...","[bom, dia, cumpr, prax, parlament, cabem, lid,...","[bom dia cumpr, dia cumpr prax, cumpr prax par..."


In [6]:
#political polarization functions




def create_frequency_table_grams(n_gram=1, indf=None, right_parties=right, left_parties=left):
    grams_d={1: 'uni_grams', 2: 'bi_grams', 3: 'tri_grams'}
   

    r_grams=[item for sublist in indf[indf.party.isin(right)][grams_d[n_gram]] for item in sublist]
    l_grams=[item for sublist in indf[indf.party.isin(left)][grams_d[n_gram]] for item in sublist]

    total_counter = Counter([item for sublist in indf[grams_d[n_gram]] for item in sublist])
    right_counter = Counter(r_grams)
    left_counter = Counter(l_grams)

    df_all = pd.DataFrame.from_dict(total_counter, orient='index').reset_index()
    df_all.columns=['phrase','count']
    df_all['count_right']=[right_counter[x] for x in df_all.phrase]
    df_all['count_left']=[left_counter[x] for x in df_all.phrase]


    df_all['count_left_total']=sum(left_counter.values())
    df_all['count_right_total']=sum(right_counter.values())
    # frequency of every words except the given one (by row)
    #Jensen et al. (2012),p.10 
    #f-pck is the frequency of all phrases used in Con­gress c by party k excluding phrase p
    df_all['f_right_minus']=( df_all['count_right_total']-df_all['count_right'])/df_all['count_right_total']
    df_all['f_left_minus']=( df_all['count_left_total']-df_all['count_left'])/df_all['count_left_total']

    df_all['f_right']=df_all['count_right']/df_all['count_right_total']
    df_all['f_right_norm'] = (df_all['f_right'] - df_all['f_right'].min()) / (df_all['f_right'].max() - df_all['f_right'].min())  
    df_all['f_left']=df_all['count_left']/df_all['count_left_total']
    df_all['f_left_norm'] = (df_all['f_left'] - df_all['f_left'].min()) / (df_all['f_left'].max() - df_all['f_left'].min())  

    df_all['f_right_minus_norm']=(df_all['f_right_minus'] - df_all['f_right_minus'].min()) / (df_all['f_right_minus'].max() - df_all['f_right_minus'].min()) 
    df_all['f_left_minus_norm']=(df_all['f_left_minus'] - df_all['f_left_minus'].min()) / (df_all['f_left_minus'].max() - df_all['f_left_minus'].min()) 

    return df_all


def calculate_pearson(df_all):
    aa=df_all['f_right_norm']*df_all['f_left_minus_norm'] 
    bb=df_all['f_left_norm']*df_all['f_right_minus_norm']
    cc=aa-bb
    dd=cc*cc
    d11=df_all['f_right_norm']+df_all['f_left_norm']
    d22=df_all['f_right_norm']+df_all['f_right_minus_norm']
    d33=df_all['f_left_norm']+df_all['f_left_minus_norm']
    d44=df_all['f_right_minus_norm']+df_all['f_left_minus_norm']
    denom=d11*d22*d33*d44
    pp=dd/denom
    return pp

def create_phrase_partisanship(df):
    aa=df['f_right_norm']+df['f_left_norm']
    df['rho']=df['f_right_norm']/aa
    
    df['gram_partisanship']= 0.5 * df['f_right_norm']*df['rho']+0.5 *df['f_right_norm']*(1-df['rho'])
    
    return df

def create_polarization_correlation(df):
    
    aa=df['f_left_norm']*-1
    bb= df['f_right_norm']*1
    df['beta_polarization']=aa+bb

    return df

In [7]:
df_freq=create_frequency_table_grams(n_gram=3, indf=df_tok, right_parties=right, left_parties=left)

In [8]:
df_freq['pearson_quad']=calculate_pearson(df_freq)
trigrams_table=df_freq[df_freq.pearson_quad>0]

In [9]:
trigrams_table=create_phrase_partisanship(trigrams_table)
trigrams_table=create_polarization_correlation(trigrams_table)
trigrams_table.sort_values(by='gram_partisanship',ascending=False)

Unnamed: 0,phrase,count,count_right,count_left,count_left_total,count_right_total,f_right_minus,f_left_minus,f_right,f_right_norm,f_left,f_left_norm,f_right_minus_norm,f_left_minus_norm,pearson_quad,rho,gram_partisanship,beta_polarization
4238,servic nacional saud,4454,1032,3422,3230452,2008118,0.999486,0.998941,0.000514,0.266529,1.059294e-03,0.606953,0.733471,0.393047,0.117774,0.305134,0.133264,-0.340424
2120,bloc esquerd pcp,603,491,112,3230452,2008118,0.999755,0.999965,0.000245,0.126808,3.467007e-05,0.019865,0.873192,0.980135,0.042073,0.864561,0.063404,0.106943
38698,apresent declaraca vot,1186,475,711,3230452,2008118,0.999763,0.999780,0.000237,0.122676,2.200930e-04,0.126109,0.877324,0.873891,0.000027,0.493101,0.061338,-0.003433
68917,sra secret estad,854,473,381,3230452,2008118,0.999764,0.999882,0.000236,0.122159,1.179402e-04,0.067577,0.877841,0.932423,0.008674,0.643836,0.061080,0.054582
132062,caix geral deposit,947,458,489,3230452,2008118,0.999772,0.999849,0.000228,0.118285,1.513720e-04,0.086733,0.881715,0.913267,0.002705,0.576950,0.059143,0.031552
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2335836,pretend ate implod,1,0,1,3230452,2008118,1.000000,1.000000,0.000000,0.000000,3.095542e-07,0.000177,1.000000,0.999823,0.000089,0.000000,0.000000,-0.000177
2335837,ate implod portant,1,0,1,3230452,2008118,1.000000,1.000000,0.000000,0.000000,3.095542e-07,0.000177,1.000000,0.999823,0.000089,0.000000,0.000000,-0.000177
2335838,implod portant aquil,1,0,1,3230452,2008118,1.000000,1.000000,0.000000,0.000000,3.095542e-07,0.000177,1.000000,0.999823,0.000089,0.000000,0.000000,-0.000177
2335839,portant aquil fiz,1,0,1,3230452,2008118,1.000000,1.000000,0.000000,0.000000,3.095542e-07,0.000177,1.000000,0.999823,0.000089,0.000000,0.000000,-0.000177


In [16]:
trigrams_tableS=trigrams_table[trigrams_table['count']>5]

In [17]:
partisan_dict=dict(zip(trigrams_tableS.phrase,trigrams_tableS.gram_partisanship))

In [46]:
df_tok.loc[0,'tri_grams']=[]

ValueError: Must have equal len keys and value when setting with an iterable

In [89]:
df_tok['partisanship'] = [np.empty(0,dtype=float)]*len(df_tok)
df_tok['partisanship'] =df_tok['partisanship'].astype(object)
out_res=[]
for n,row in enumerate(df_tok.tri_grams):
   
    res=[]
    for x in row:
        
        if x in partisan_dict:
            res.append( partisan_dict[x])
           
        else:
            res.append( 0)
    out_res.append(res)
df_tok.partisanship=out_res

In [91]:
df_tok['avg_partisanship']=df_tok.partisanship.apply(lambda x: sum(x))

In [92]:
df_tok

Unnamed: 0,speaker,speech,filename,number,session,term,Date,link,party,speaker_ntime,genre,len_speech,year,tokens,tokens_stemer_stop,tokens_cleaned,tri_grams,partisanship,avg_partisanship
0,josemanuelpureza,muito bem,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,BE,891.0,M,2,2015,"[muito, bem]","[muit, bem]",[],[],[],0.000000
1,teresalealcoelho,muito bem,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,87.0,F,2,2015,"[muito, bem]","[muit, bem]",[],[],[],0.000000
2,luismontenegro,muito bom dia a todos sras e srs deputados cum...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,204,2015,"[muito, bom, dia, a, todos, sras, e, srs, depu...","[muit, bom, dia, tod, sras, srs, deput, cumpr,...","[bom, dia, cumpr, prax, parlament, cabem, lid,...","[bom dia cumpr, dia cumpr prax, cumpr prax par...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.008394
3,luismontenegro,o seu exemplo de tolerancia imparcialidade de ...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,219,2015,"[o, seu, exemplo, de, tolerancia, imparcialida...","[exempl, toleranc, imparcial, competenc, polit...","[exempl, toleranc, imparcial, competenc, polit...","[exempl toleranc imparcial, toleranc imparcial...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.004649
4,luismontenegro,sr presidente o grupo parlamentar do psd indic...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,27,2015,"[sr, presidente, o, grupo, parlamentar, do, ps...","[sr, president, grup, parlament, psd, indic, s...","[grup, parlament, indic, duart, pachec, exerc,...","[grup parlament indic, parlament indic duart, ...","[0, 0, 0, 0, 0, 0, 0, 0.00025826446280991736]",0.000258
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91299,claudiaandre,quem esteve do lado da geringonca,darl14sl03n031.txt,31,3,14,2021-12-16,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,68.0,F,6,2021,"[quem, esteve, do, lado, da, geringonca]","[estev, lad, geringonc]","[estev, lad, geringonc]",[estev lad geringonc],[0],0.000000
91300,claudiaandre,nao nao,darl14sl03n031.txt,31,3,14,2021-12-16,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,68.0,F,2,2021,"[nao, nao]","[nao, nao]",[],[],[],0.000000
91301,claudiaandre,deixe la o psd e fale de educacao,darl14sl03n031.txt,31,3,14,2021-12-16,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,68.0,F,8,2021,"[deixe, la, o, psd, e, fale, de, educacao]","[deix, la, psd, fal, educaca]","[deix, la, fal, educaca]","[deix la fal, la fal educaca]","[0, 0]",0.000000
91302,miguelarrobas,sra presidente srs membros do governo sras e s...,darl14sl03n031.txt,31,3,14,2021-12-16,https://debates.parlamento.pt/catalogo/r3/dar/...,CDS-PP,43.0,M,626,2021,"[sra, presidente, srs, membros, do, governo, s...","[sra, president, srs, membr, govern, sras, srs...","[sra, membr, ministr, educaca, estam, final, p...","[sra membr ministr, membr ministr educaca, min...","[0.00038739669421487604, 0.0003873966942148760...",0.033316


In [76]:
#df_tok.loc[0,'partisanship'].append(0)
len(res)
len(df_tok.loc[4,'partisanship'])

0

In [37]:
partisan_dict['bom dia cumpr']

KeyError: 'bom dia cumpr'

In [12]:
len(partisan_dict)

4175799

In [14]:
df_tok[0:10]

Unnamed: 0,speaker,speech,filename,number,session,term,Date,link,party,speaker_ntime,genre,len_speech,year,tokens,tokens_stemer_stop,tokens_cleaned,tri_grams
0,josemanuelpureza,muito bem,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,BE,891.0,M,2,2015,"[muito, bem]","[muit, bem]",[],[]
1,teresalealcoelho,muito bem,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,87.0,F,2,2015,"[muito, bem]","[muit, bem]",[],[]
2,luismontenegro,muito bom dia a todos sras e srs deputados cum...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,204,2015,"[muito, bom, dia, a, todos, sras, e, srs, depu...","[muit, bom, dia, tod, sras, srs, deput, cumpr,...","[bom, dia, cumpr, prax, parlament, cabem, lid,...","[bom dia cumpr, dia cumpr prax, cumpr prax par..."
3,luismontenegro,o seu exemplo de tolerancia imparcialidade de ...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,219,2015,"[o, seu, exemplo, de, tolerancia, imparcialida...","[exempl, toleranc, imparcial, competenc, polit...","[exempl, toleranc, imparcial, competenc, polit...","[exempl toleranc imparcial, toleranc imparcial..."
4,luismontenegro,sr presidente o grupo parlamentar do psd indic...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,27,2015,"[sr, presidente, o, grupo, parlamentar, do, ps...","[sr, president, grup, parlament, psd, indic, s...","[grup, parlament, indic, duart, pachec, exerc,...","[grup parlament indic, parlament indic duart, ..."
5,luismontenegro,sr presidente sras e srs deputados hoje foi qu...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,26,2015,"[sr, presidente, sras, e, srs, deputados, hoje...","[sr, president, sras, srs, deput, hoj, quebr, ...","[hoj, quebr, regr, tradica, sempr, democrac, p...","[hoj quebr regr, quebr regr tradica, regr trad..."
6,luismontenegro,lamento que pela primeira vez um presidente da...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,48,2015,"[lamento, que, pela, primeira, vez, um, presid...","[lament, pel, primeir, vez, president, assembl...","[lament, primeir, vez, assembl, republ, tenh, ...","[lament primeir vez, primeir vez assembl, vez ..."
7,luismontenegro,nao obstante quero cumprimentar o novo preside...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,85,2015,"[nao, obstante, quero, cumprimentar, o, novo, ...","[nao, obstant, quer, cumpriment, nov, presiden...","[obstant, cumpriment, nov, assembl, republ, re...","[obstant cumpriment nov, cumpriment nov assemb..."
8,luismontenegro,sras e srs deputados e isto que o povo espera ...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,64,2015,"[sras, e, srs, deputados, e, isto, que, o, pov...","[sras, srs, deput, pov, esper, tod, estam, ini...","[pov, esper, estam, inic, legislatur, grand, e...","[pov esper estam, esper estam inic, estam inic..."
9,luismontenegro,e neste inicio de legislatura quero dizer as s...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,59,2015,"[e, neste, inicio, de, legislatura, quero, diz...","[nest, inici, legislatur, quer, diz, sras, srs...","[inici, legislatur, assembl, republ, cab, resp...","[inici legislatur assembl, legislatur assembl ..."


In [19]:
#partisan_dict['dia cumpr prax']
df_tok[3].tri_grams.replace( partisan_dict,inplace=True)

In [34]:
partisan_dic.keys()

NameError: name 'partisan_dic' is not defined

In [31]:
#df_tok.loc[3,'tri_grams'].replace( partisan_dict,inplace=True)
df_tok[0:10].replace({"tri_grams": partisan_dict})

Unnamed: 0,speaker,speech,filename,number,session,term,Date,link,party,speaker_ntime,genre,len_speech,year,tokens,tokens_stemer_stop,tokens_cleaned,tri_grams
0,josemanuelpureza,muito bem,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,BE,891.0,M,2,2015,"[muito, bem]","[muit, bem]",[],[]
1,teresalealcoelho,muito bem,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,87.0,F,2,2015,"[muito, bem]","[muit, bem]",[],[]
2,luismontenegro,muito bom dia a todos sras e srs deputados cum...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,204,2015,"[muito, bom, dia, a, todos, sras, e, srs, depu...","[muit, bom, dia, tod, sras, srs, deput, cumpr,...","[bom, dia, cumpr, prax, parlament, cabem, lid,...","[bom dia cumpr, dia cumpr prax, cumpr prax par..."
3,luismontenegro,o seu exemplo de tolerancia imparcialidade de ...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,219,2015,"[o, seu, exemplo, de, tolerancia, imparcialida...","[exempl, toleranc, imparcial, competenc, polit...","[exempl, toleranc, imparcial, competenc, polit...","[exempl toleranc imparcial, toleranc imparcial..."
4,luismontenegro,sr presidente o grupo parlamentar do psd indic...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,27,2015,"[sr, presidente, o, grupo, parlamentar, do, ps...","[sr, president, grup, parlament, psd, indic, s...","[grup, parlament, indic, duart, pachec, exerc,...","[grup parlament indic, parlament indic duart, ..."
5,luismontenegro,sr presidente sras e srs deputados hoje foi qu...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,26,2015,"[sr, presidente, sras, e, srs, deputados, hoje...","[sr, president, sras, srs, deput, hoj, quebr, ...","[hoj, quebr, regr, tradica, sempr, democrac, p...","[hoj quebr regr, quebr regr tradica, regr trad..."
6,luismontenegro,lamento que pela primeira vez um presidente da...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,48,2015,"[lamento, que, pela, primeira, vez, um, presid...","[lament, pel, primeir, vez, president, assembl...","[lament, primeir, vez, assembl, republ, tenh, ...","[lament primeir vez, primeir vez assembl, vez ..."
7,luismontenegro,nao obstante quero cumprimentar o novo preside...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,85,2015,"[nao, obstante, quero, cumprimentar, o, novo, ...","[nao, obstant, quer, cumpriment, nov, presiden...","[obstant, cumpriment, nov, assembl, republ, re...","[obstant cumpriment nov, cumpriment nov assemb..."
8,luismontenegro,sras e srs deputados e isto que o povo espera ...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,64,2015,"[sras, e, srs, deputados, e, isto, que, o, pov...","[sras, srs, deput, pov, esper, tod, estam, ini...","[pov, esper, estam, inic, legislatur, grand, e...","[pov esper estam, esper estam inic, estam inic..."
9,luismontenegro,e neste inicio de legislatura quero dizer as s...,darl13sl01n001.txt,1,1,13,2015-10-23,https://debates.parlamento.pt/catalogo/r3/dar/...,PSD,694.0,M,59,2015,"[e, neste, inicio, de, legislatura, quero, diz...","[nest, inici, legislatur, quer, diz, sras, srs...","[inici, legislatur, assembl, republ, cab, resp...","[inici legislatur assembl, legislatur assembl ..."


In [88]:
import numpy as np
set(res)-set(df_tok.loc[3,'tri_grams'])

{0.00012913223140495868,
 0.00025826446280991736,
 0.00038739669421487604,
 0.0005165289256198347,
 0.0007747933884297521,
 0.0012913223140495868,
 0.0046487603305785125}

In [None]:
print(partisan_dict.keys())

In [78]:
for n,item in enumerate(df_tok.tri_grams):
    print(n)
    res=[]
    if len(item)>0:
        for x in item:
            if x in 
                res.append(partisan_dict[x])
        df_tok.loc[n,'partisanship']=res

        
    

0
1
2


ValueError: Must have equal len keys and value when setting with an iterable

In [54]:


df_tok[0:10].apply(lambda x : list(trigrams_table[trigrams_table.phrase==item].gram_partisanship.values[0] for item in x.tri_grams if len(trigrams_table[trigrams_table.phrase==item].gram_partisanship.values)>0) , axis=1)




SyntaxError: invalid syntax (962296208.py, line 1)

In [44]:
df_tok['gram_polarization']

0        <generator object <lambda>.<locals>.<genexpr> ...
1        <generator object <lambda>.<locals>.<genexpr> ...
2        <generator object <lambda>.<locals>.<genexpr> ...
3        <generator object <lambda>.<locals>.<genexpr> ...
4        <generator object <lambda>.<locals>.<genexpr> ...
                               ...                        
91299                                                  NaN
91300                                                  NaN
91301                                                  NaN
91302                                                  NaN
91303                                                  NaN
Name: gram_polarization, Length: 91304, dtype: object

In [None]:
df_tok.to_pickle('df_final_pol.pkl')

In [None]:
def apply_polarization_model(declaracoes2,right,left):
    dfg3=create_frequency_table_grams(n_gram=3, indf=declaracoes2, right_parties=right, left_parties=left)
    dfg3['pearson_quad']=calculate_pearson(dfg3)
    trigrams_table=dfg3[dfg3.pearson_quad>0]
    trigrams_table=create_phrase_partisanship(trigrams_table)
    trigrams_table=create_polarization_correlation(trigrams_table)
    final_df = trigrams_table.sort_values(by=['gram_partisanship'], ascending=False)
    return final_df

def create_200r(df_mainf):
    indexes_no_applause=[n for n,x in enumerate(df_mainf.phrase) if 'aplausos' not in x]
    df_mainf=df_mainf.iloc[indexes_no_applause]

    tri_final = df_mainf[0:200].append(df_mainf[-200:], ignore_index=True)
    return tri_final