# Data cleaning 

In [2]:
import pandas as pd

In [41]:
from use_cases.contributions import create_table_contributions

from use_cases.emotions import create_table_emotions
from use_cases.personal_needs import create_table_personal_needs
from use_cases.dialogues import create_table_dialogues
from use_cases.country_needs import create_table_country_needs
from use_cases.persons import create_table_persons
from use_cases.individuals import create_table_individuals
from use_cases.pairs import create_pair_token


pd.options.mode.chained_assignment = None 

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Loading Data Surveys

In [21]:
chunksize   = 100 # Just for debugging
survey_path = './data/BBDD_Dialogos_2021_01_27.csv' 
survey = pd.read_csv(survey_path, chunksize=chunksize, low_memory=False)
for survey in survey: break
    
ind_online_survey_path = './data/Consulta_Individual_online_v2.xlsx'
ind_online_survey = pd.read_excel(ind_online_survey_path, 'Sheet1')
    
ind_survey_path = './data/Base_final_digitación_Consultas.xlsx'
ind_survey = pd.read_excel(ind_survey_path, 'CONSULTAS')

In [20]:
survey.sample(2)
ind_online_survey.sample(2)

True

## Processing Uses Cases
1. Emotions 
2. Contributions
3. Country needs 
4. Personal/familiar needs

### Persons Table (Dialogue)

In [69]:
persons_table = create_table_persons(survey)

In [70]:
persons_table.sample(3)

Unnamed: 0,diag_id,age,sex,level,comuna_id,age_range
36,enc_u_4598385550153533853,21,f,basica_completa,7101,15-30
90,enc_u_4595944093321943387,28,f,universitaria_completa,14101,15-30
6,enc_u_4602649914809752187,21,f,universitaria_incompleta,14101,15-30


### Dialogues

In [67]:
dialogues_table = create_table_dialogues(survey)

In [68]:
dialogues_table.sample(3)

Unnamed: 0,diag_id,date,init_time,end_time,location,address,comuna_id,n_members,group_name
51,enc_u_4598337190995449677,22-01-2020,17:00:00,00:00:00,direccion regional injuv coquimbo,av francisco de aguirre #414,4101,7,usuarios de oficina movil de bienes nacionales
86,enc_u_4596538444129461689,06-03-2020,16:00:00,17:30:00,terminal lincosur,sala de star terminales,4101,8,usuarios de oficina movil de bienes nacionales
62,enc_u_4597641730116574573,05-03-2020,10:00:00,12:00:00,centro cultural de freire,pedro canales numero 137,9105,5,usuarios de oficina movil de bienes nacionales


### Emotions

<img src="diagrams/q1.png" width=730 height=520 />

In [74]:
%%time
emotions_table = create_table_emotions(survey)

CPU times: user 927 ms, sys: 72.2 ms, total: 999 ms
Wall time: 999 ms


In [75]:
emotions_table.sample(2)

Unnamed: 0,diag_id,name,name_token,macro,text,text_tokens
15,ENC_U_4601870260112596575,pena,[pena],pena,"al ver este enfrentamiento entre politicos, pe...","[ver, enfrentamiento, politicos, personas, etc..."
93,ENC_U_4595914560111217720,rabia,[rabia],rabia,"por los destrozos que se han generado, a la pr...","[destrozos, generado, propiedad]"


### Emotion Pair

In [78]:
emotion_pair = create_pair_token(emotions_table, 'text_tokens', 'emotion_id')

KeyError: 'id'

In [79]:
emotion_pair.sample(3)

NameError: name 'emotion_pair' is not defined

### Individuals

In [23]:
%%time
individuals_table = create_table_individuals(ind_online_survey, ind_survey)

CPU times: user 11.3 s, sys: 72.4 ms, total: 11.4 s
Wall time: 11.4 s


In [26]:
individuals_table.sample(2)

15389

### Country Needs

In [51]:
country_needs = create_table_country_needs(survey)

In [40]:
country_needs.sample(3)

Unnamed: 0,id,diag_id,name,name_tokens,macro,exp,exp_tokens,role,role_tokens,actor,priority
28,633,enc_u_4598585422351013149,,,,no mas afp,"[mas, afp]",,,estado,0.0
118,420,enc_u_4598374070151027288,transporte,[transporte],,,,subsidiar el transporte para los adultos mayores.,"[subsidiar, transporte, adultos, mayores]",estado,
6,152,enc_u_4602649914809752187,educacion,[educacion],,malo el proceso de admision a la educacion bas...,"[malo, proceso, admision, educacion, basica, m...",que se renueven los procesos de seleccion educ...,"[renueven, procesos, seleccion, educativos, es...",estado,0.0


### Country Need Role Pair

In [66]:
country_need_role_pair = create_pair_token(country_needs, 'role_tokens', 'country_need_id')

In [67]:
country_need_role_pair.sample(2)

Unnamed: 0,id,country_need_id,word_1,word_2
1025,1026,294,cotizaciones,previsionales
2212,2213,707,lineamientos,estrictos


### Country Need Exp Pair

In [68]:
country_need_exp_pair = create_pair_token(country_needs, 'exp_tokens', 'country_need_id')

In [69]:
country_need_exp_pair.sample(2)

Unnamed: 0,id,country_need_id,word_1,word_2
86,87,10,atencion,especialistas
2509,2510,621,hijos,nietos


### Family/PersonalNeeds

In [70]:
personal_needs = create_table_personal_needs(survey)

In [71]:
personal_needs.sample(3)

Unnamed: 0,id,diag_id,name,name_tokens,exp,exp_tokens,macro,priority
37,337,ENC_U_4598383170151924860,seguridad,[seguridad],mucha delincuencia,"[mucha, delincuencia]",seguridad,2
41,141,ENC_U_4598374070151027288,medicamentos,[medicamentos],el precio de los medicamentos es muy elevado.,"[precio, medicamentos, elevado]",medicamentos,1
11,311,ENC_U_4601970490118053696,oportunidades,[oportunidades],no tenemos oportunidades para surgir.,"[oportunidades, surgir]",oportunidades,1


### Personal Need Pair

In [72]:
personal_need_pair = create_pair_token(personal_needs, 'exp_tokens', 'personal_need_id')

In [73]:
personal_need_pair.sample(3)

Unnamed: 0,id,personal_need_id,word_1,word_2
525,526,94,burocracia,procesos
2076,2077,444,pago,consumo
1383,1384,250,vida,ello


### Contributions

In [58]:
contributions = create_table_contributions(survey)

In [59]:
contributions.sample(3)

Unnamed: 0,id,diag_id,text,text_tokens,macro
15,415,ENC_U_4601870260112596575,ser buenos ciudadanos.,"[ser, buenos, ciudadanos]",ser buenos ciudadanos.
61,261,ENC_U_4597643308415751967,participar de los procesos electorales que afr...,"[participar, procesos, electorales, afrontara,...",participar de los procesos electorales que afr...
35,135,ENC_U_4598389170159296566,participando en actividades actividades civicas,"[participando, actividades, actividades, civicas]",participando en actividades actividades civicas
