# Regular expresion and Reshape 

In [1]:
import pandas as pd
import numpy as np
import chardet # to get string character format 
import re

In [5]:
panel = pd.read_stata("../../data/ENAHO/panel_2016_2018.dta")

One or more strings in the dta file could not be decoded using utf-8, and
so the fallback encoding of latin-1 is being used.  This can happen when a file
has been incorrectly encoded by Stata or some other software. You should verify
the string values returned are correct.


In [9]:
panel

Unnamed: 0,conglome,vivienda,aÑo_16,mes_16,nconglome_16,conglome_16,vivienda_16,hogar_16,ubigeo_16,dominio_16,...,linea_20,pobreza_20,tipocuestionario_20,tipoentrevista_20,factor07_20,nconglome_20,sub_conglome_20,lineav_rpl_20,lineav_20,pobrezav_20
0,005012,039,2016,01,007083,005012,039,11,010101,sierra norte,...,303.520996,no pobre,cuestionario reducido - durante la emergencia ...,telefónico,39.763752,007083,00,657,553.222473,no vulnerable
1,005012,025,2016,01,007083,005012,025,11,010101,sierra norte,...,303.520996,no pobre,cuestionario reducido - durante la emergencia ...,telefónico,39.763752,007083,00,657,546.850037,no vulnerable
2,005012,012,2016,01,007083,005012,012,11,010101,sierra norte,...,303.520996,no pobre,cuestionario reducido - durante la emergencia ...,telefónico,39.763752,007083,00,657,561.609619,vulnerable no pobre
3,005017,075,2016,05,007093,005017,075,11,010101,sierra norte,...,303.520996,no pobre,cuestionario reducido - durante la emergencia ...,telefónico,55.669254,007093,00,657,559.775146,vulnerable no pobre
4,005017,008,2016,05,007093,005017,008,11,010101,sierra norte,...,303.520996,no pobre,cuestionario reducido - durante la emergencia ...,telefónico,55.669254,007093,00,657,563.189392,no vulnerable
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1934,010360,063,2016,11,008793,010360,063,11,220504,selva,...,247.342163,no pobre,cuestionario completo - durante la emergencia ...,presencial,111.686325,008793,00,657,569.045166,vulnerable no pobre
1935,010360,064,2016,11,008793,010360,064,11,220504,selva,...,247.342163,no pobre,cuestionario completo - durante la emergencia ...,presencial,111.686325,008793,00,657,576.689819,vulnerable no pobre
1936,010360,142,2016,11,008793,010360,142,11,220504,selva,...,247.342163,no pobre,cuestionario completo - durante la emergencia ...,presencial,111.686325,008793,00,657,573.543945,vulnerable no pobre
1937,010360,144,2016,11,008793,010360,144,11,220504,selva,...,247.342163,no pobre,cuestionario completo - durante la emergencia ...,presencial,111.686325,008793,00,657,571.337524,no vulnerable


In [275]:
panel.columns = map(str.lower, panel.columns)  # any capittal letter to lower

In [276]:
panel.rename(columns = {'año_16':'year_16', 'año_17':'year_17', 'año_18':'year_18', 'año_19':'year_19',
                       'año_20':'year_20'}, inplace = True)

In [277]:
# rename hogar_16 to build a identifier per household 

panel.rename(columns = {'hogar_16':'hogar'}, inplace = True)

In [278]:
panel.rename(columns = {'conglome':'cong', 'vivienda':'viv', 'hogar':'hog'}, inplace = True)

In [279]:
panel

Unnamed: 0,cong,viv,year_16,mes_16,nconglome_16,conglome_16,vivienda_16,hog,ubigeo_16,dominio_16,...,linea_20,pobreza_20,tipocuestionario_20,tipoentrevista_20,factor07_20,nconglome_20,sub_conglome_20,lineav_rpl_20,lineav_20,pobrezav_20
0,005012,039,2016,01,007083,005012,039,11,010101,sierra norte,...,303.520996,no pobre,cuestionario reducido - durante la emergencia ...,telefónico,39.763752,007083,00,657,553.222473,no vulnerable
1,005012,025,2016,01,007083,005012,025,11,010101,sierra norte,...,303.520996,no pobre,cuestionario reducido - durante la emergencia ...,telefónico,39.763752,007083,00,657,546.850037,no vulnerable
2,005012,012,2016,01,007083,005012,012,11,010101,sierra norte,...,303.520996,no pobre,cuestionario reducido - durante la emergencia ...,telefónico,39.763752,007083,00,657,561.609619,vulnerable no pobre
3,005017,075,2016,05,007093,005017,075,11,010101,sierra norte,...,303.520996,no pobre,cuestionario reducido - durante la emergencia ...,telefónico,55.669254,007093,00,657,559.775146,vulnerable no pobre
4,005017,008,2016,05,007093,005017,008,11,010101,sierra norte,...,303.520996,no pobre,cuestionario reducido - durante la emergencia ...,telefónico,55.669254,007093,00,657,563.189392,no vulnerable
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1934,010360,063,2016,11,008793,010360,063,11,220504,selva,...,247.342163,no pobre,cuestionario completo - durante la emergencia ...,presencial,111.686325,008793,00,657,569.045166,vulnerable no pobre
1935,010360,064,2016,11,008793,010360,064,11,220504,selva,...,247.342163,no pobre,cuestionario completo - durante la emergencia ...,presencial,111.686325,008793,00,657,576.689819,vulnerable no pobre
1936,010360,142,2016,11,008793,010360,142,11,220504,selva,...,247.342163,no pobre,cuestionario completo - durante la emergencia ...,presencial,111.686325,008793,00,657,573.543945,vulnerable no pobre
1937,010360,144,2016,11,008793,010360,144,11,220504,selva,...,247.342163,no pobre,cuestionario completo - durante la emergencia ...,presencial,111.686325,008793,00,657,571.337524,no vulnerable


In [305]:
panel['linea_19']

0       302.621338
1       302.621338
2       302.621338
3       302.621338
4       302.621338
           ...    
1934    244.336487
1935    244.336487
1936    244.336487
1937    244.336487
1938    349.202942
Name: linea_19, Length: 1939, dtype: float32

In [280]:
list(panel.columns)

['cong',
 'viv',
 'year_16',
 'mes_16',
 'nconglome_16',
 'conglome_16',
 'vivienda_16',
 'hog',
 'ubigeo_16',
 'dominio_16',
 'percepho_16',
 'gru52hd2_16',
 'ingpeihd_16',
 'mieperho_16',
 'totmieho_16',
 'ia01hd_16',
 'ia02hd_16',
 'ingbruhd_16',
 'ingnethd_16',
 'pagesphd_16',
 'ingindhd_16',
 'ingauthd_16',
 'insedthd_16',
 'insedlhd_16',
 'paesechd_16',
 'ingseihd_16',
 'isecauhd_16',
 'ingexthd_16',
 'ingtrahd_16',
 'ingtexhd_16',
 'ingrenhd_16',
 'ingoexhd_16',
 'g05hd_16',
 'ig06hd_16',
 'g05hd1_16',
 'ig06hd1_16',
 'g05hd2_16',
 'ig06hd2_16',
 'g05hd3_16',
 'ig06hd3_16',
 'g05hd4_16',
 'ig06hd4_16',
 'g05hd5_16',
 'ig06hd5_16',
 'g05hd6_16',
 'ig06hd6_16',
 'g07hd_16',
 'ig08hd_16',
 'ig03hd1_16',
 'ig03hd2_16',
 'ig03hd3_16',
 'ig03hd4_16',
 'sg23_16',
 'sig24_16',
 'sg25_16',
 'sig26_16',
 'ga03hd_16',
 'ga04hd_16',
 'sg42_16',
 'sg42d_16',
 'sg421_16',
 'sg42d1_16',
 'sg422_16',
 'sg42d2_16',
 'sg423_16',
 'sg42d3_16',
 'ingtprhd_16',
 'ingtpuhd_16',
 'ingtpu01_16',
 'ingt

## 1.0 Substract letters an d number, but not special characters 

In [281]:
filter_list = []

for i in range(0,len(list(panel.columns))):

        w =  re.sub('[^-A-Za-z0-9]','', list(panel.columns)[i])
        filter_list.append(w)
        


In [282]:
filter_list

['cong',
 'viv',
 'year16',
 'mes16',
 'nconglome16',
 'conglome16',
 'vivienda16',
 'hog',
 'ubigeo16',
 'dominio16',
 'percepho16',
 'gru52hd216',
 'ingpeihd16',
 'mieperho16',
 'totmieho16',
 'ia01hd16',
 'ia02hd16',
 'ingbruhd16',
 'ingnethd16',
 'pagesphd16',
 'ingindhd16',
 'ingauthd16',
 'insedthd16',
 'insedlhd16',
 'paesechd16',
 'ingseihd16',
 'isecauhd16',
 'ingexthd16',
 'ingtrahd16',
 'ingtexhd16',
 'ingrenhd16',
 'ingoexhd16',
 'g05hd16',
 'ig06hd16',
 'g05hd116',
 'ig06hd116',
 'g05hd216',
 'ig06hd216',
 'g05hd316',
 'ig06hd316',
 'g05hd416',
 'ig06hd416',
 'g05hd516',
 'ig06hd516',
 'g05hd616',
 'ig06hd616',
 'g07hd16',
 'ig08hd16',
 'ig03hd116',
 'ig03hd216',
 'ig03hd316',
 'ig03hd416',
 'sg2316',
 'sig2416',
 'sg2516',
 'sig2616',
 'ga03hd16',
 'ga04hd16',
 'sg4216',
 'sg42d16',
 'sg42116',
 'sg42d116',
 'sg42216',
 'sg42d216',
 'sg42316',
 'sg42d316',
 'ingtprhd16',
 'ingtpuhd16',
 'ingtpu0116',
 'ingtpu0216',
 'ingtpu0316',
 'ingtpu0416',
 'ingtpu0516',
 'gru11hd16'

## Drop special character '_'

In [283]:
filter_list = []

for i in range(0,len(list(panel.columns))):

        w =  re.sub('[\_]','', list(panel.columns)[i])
        filter_list.append(w)

filter_list

['cong',
 'viv',
 'year16',
 'mes16',
 'nconglome16',
 'conglome16',
 'vivienda16',
 'hog',
 'ubigeo16',
 'dominio16',
 'percepho16',
 'gru52hd216',
 'ingpeihd16',
 'mieperho16',
 'totmieho16',
 'ia01hd16',
 'ia02hd16',
 'ingbruhd16',
 'ingnethd16',
 'pagesphd16',
 'ingindhd16',
 'ingauthd16',
 'insedthd16',
 'insedlhd16',
 'paesechd16',
 'ingseihd16',
 'isecauhd16',
 'ingexthd16',
 'ingtrahd16',
 'ingtexhd16',
 'ingrenhd16',
 'ingoexhd16',
 'g05hd16',
 'ig06hd16',
 'g05hd116',
 'ig06hd116',
 'g05hd216',
 'ig06hd216',
 'g05hd316',
 'ig06hd316',
 'g05hd416',
 'ig06hd416',
 'g05hd516',
 'ig06hd516',
 'g05hd616',
 'ig06hd616',
 'g07hd16',
 'ig08hd16',
 'ig03hd116',
 'ig03hd216',
 'ig03hd316',
 'ig03hd416',
 'sg2316',
 'sig2416',
 'sg2516',
 'sig2616',
 'ga03hd16',
 'ga04hd16',
 'sg4216',
 'sg42d16',
 'sg42116',
 'sg42d116',
 'sg42216',
 'sg42d216',
 'sg42316',
 'sg42d316',
 'ingtprhd16',
 'ingtpuhd16',
 'ingtpu0116',
 'ingtpu0216',
 'ingtpu0316',
 'ingtpu0416',
 'ingtpu0516',
 'gru11hd16'

## 2.0 Substract just letters (capital or lower)

In [284]:
filter_list = []

for i in range(0,len(list(panel.columns))):

        w = re.sub('[^a-zA-Z]','', list(panel.columns)[i])
        filter_list.append(w)
        

In [285]:
filter_list

['cong',
 'viv',
 'year',
 'mes',
 'nconglome',
 'conglome',
 'vivienda',
 'hog',
 'ubigeo',
 'dominio',
 'percepho',
 'gruhd',
 'ingpeihd',
 'mieperho',
 'totmieho',
 'iahd',
 'iahd',
 'ingbruhd',
 'ingnethd',
 'pagesphd',
 'ingindhd',
 'ingauthd',
 'insedthd',
 'insedlhd',
 'paesechd',
 'ingseihd',
 'isecauhd',
 'ingexthd',
 'ingtrahd',
 'ingtexhd',
 'ingrenhd',
 'ingoexhd',
 'ghd',
 'ighd',
 'ghd',
 'ighd',
 'ghd',
 'ighd',
 'ghd',
 'ighd',
 'ghd',
 'ighd',
 'ghd',
 'ighd',
 'ghd',
 'ighd',
 'ghd',
 'ighd',
 'ighd',
 'ighd',
 'ighd',
 'ighd',
 'sg',
 'sig',
 'sg',
 'sig',
 'gahd',
 'gahd',
 'sg',
 'sgd',
 'sg',
 'sgd',
 'sg',
 'sgd',
 'sg',
 'sgd',
 'ingtprhd',
 'ingtpuhd',
 'ingtpu',
 'ingtpu',
 'ingtpu',
 'ingtpu',
 'ingtpu',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',
 'gruhd',

In [286]:
filter_list = []

for i in range(0,len(list(panel.columns))):

        w = re.sub('[^0-9]','', list(panel.columns)[i])
        filter_list.append(w)
        

        
filter_list

['',
 '',
 '16',
 '16',
 '16',
 '16',
 '16',
 '',
 '16',
 '16',
 '16',
 '52216',
 '16',
 '16',
 '16',
 '0116',
 '0216',
 '16',
 '16',
 '16',
 '16',
 '16',
 '16',
 '16',
 '16',
 '16',
 '16',
 '16',
 '16',
 '16',
 '16',
 '16',
 '0516',
 '0616',
 '05116',
 '06116',
 '05216',
 '06216',
 '05316',
 '06316',
 '05416',
 '06416',
 '05516',
 '06516',
 '05616',
 '06616',
 '0716',
 '0816',
 '03116',
 '03216',
 '03316',
 '03416',
 '2316',
 '2416',
 '2516',
 '2616',
 '0316',
 '0416',
 '4216',
 '4216',
 '42116',
 '42116',
 '42216',
 '42216',
 '42316',
 '42316',
 '16',
 '16',
 '0116',
 '0216',
 '0316',
 '0416',
 '0516',
 '1116',
 '12116',
 '12216',
 '13116',
 '13216',
 '13316',
 '13416',
 '1416',
 '14116',
 '14216',
 '14316',
 '14416',
 '14516',
 '14616',
 '2116',
 '22116',
 '22216',
 '23116',
 '23216',
 '23316',
 '2416',
 '3116',
 '32116',
 '32216',
 '33116',
 '33216',
 '33316',
 '3416',
 '4116',
 '42116',
 '42216',
 '43116',
 '43216',
 '43316',
 '4416',
 '5116',
 '52116',
 '53116',
 '53216',
 '53316

## 3.0  Drop last three string 

In [291]:
filter_list = []

for i in range(0,len(list(panel.columns))):

        w = re.sub(".{3}$",'', list(panel.columns)[i])
        filter_list.append(w)

In [299]:
len(filter_list)

804

In [292]:
filter_list

['c',
 '',
 'year',
 'mes',
 'nconglome',
 'conglome',
 'vivienda',
 '',
 'ubigeo',
 'dominio',
 'percepho',
 'gru52hd2',
 'ingpeihd',
 'mieperho',
 'totmieho',
 'ia01hd',
 'ia02hd',
 'ingbruhd',
 'ingnethd',
 'pagesphd',
 'ingindhd',
 'ingauthd',
 'insedthd',
 'insedlhd',
 'paesechd',
 'ingseihd',
 'isecauhd',
 'ingexthd',
 'ingtrahd',
 'ingtexhd',
 'ingrenhd',
 'ingoexhd',
 'g05hd',
 'ig06hd',
 'g05hd1',
 'ig06hd1',
 'g05hd2',
 'ig06hd2',
 'g05hd3',
 'ig06hd3',
 'g05hd4',
 'ig06hd4',
 'g05hd5',
 'ig06hd5',
 'g05hd6',
 'ig06hd6',
 'g07hd',
 'ig08hd',
 'ig03hd1',
 'ig03hd2',
 'ig03hd3',
 'ig03hd4',
 'sg23',
 'sig24',
 'sg25',
 'sig26',
 'ga03hd',
 'ga04hd',
 'sg42',
 'sg42d',
 'sg421',
 'sg42d1',
 'sg422',
 'sg42d2',
 'sg423',
 'sg42d3',
 'ingtprhd',
 'ingtpuhd',
 'ingtpu01',
 'ingtpu02',
 'ingtpu03',
 'ingtpu04',
 'ingtpu05',
 'gru11hd',
 'gru12hd1',
 'gru12hd2',
 'gru13hd1',
 'gru13hd2',
 'gru13hd3',
 'gru13hd4',
 'gru14hd',
 'gru14hd1',
 'gru14hd2',
 'gru14hd3',
 'gru14hd4',
 'gru14

In [296]:
# drop duplicates in a list

new_list = list(dict.fromkeys(filter_list))

In [298]:
len(new_list)

182

In [297]:
new_list

['c',
 '',
 'year',
 'mes',
 'nconglome',
 'conglome',
 'vivienda',
 'ubigeo',
 'dominio',
 'percepho',
 'gru52hd2',
 'ingpeihd',
 'mieperho',
 'totmieho',
 'ia01hd',
 'ia02hd',
 'ingbruhd',
 'ingnethd',
 'pagesphd',
 'ingindhd',
 'ingauthd',
 'insedthd',
 'insedlhd',
 'paesechd',
 'ingseihd',
 'isecauhd',
 'ingexthd',
 'ingtrahd',
 'ingtexhd',
 'ingrenhd',
 'ingoexhd',
 'g05hd',
 'ig06hd',
 'g05hd1',
 'ig06hd1',
 'g05hd2',
 'ig06hd2',
 'g05hd3',
 'ig06hd3',
 'g05hd4',
 'ig06hd4',
 'g05hd5',
 'ig06hd5',
 'g05hd6',
 'ig06hd6',
 'g07hd',
 'ig08hd',
 'ig03hd1',
 'ig03hd2',
 'ig03hd3',
 'ig03hd4',
 'sg23',
 'sig24',
 'sg25',
 'sig26',
 'ga03hd',
 'ga04hd',
 'sg42',
 'sg42d',
 'sg421',
 'sg42d1',
 'sg422',
 'sg42d2',
 'sg423',
 'sg42d3',
 'ingtprhd',
 'ingtpuhd',
 'ingtpu01',
 'ingtpu02',
 'ingtpu03',
 'ingtpu04',
 'ingtpu05',
 'gru11hd',
 'gru12hd1',
 'gru12hd2',
 'gru13hd1',
 'gru13hd2',
 'gru13hd3',
 'gru13hd4',
 'gru14hd',
 'gru14hd1',
 'gru14hd2',
 'gru14hd3',
 'gru14hd4',
 'gru14hd5',

In [300]:
new_list = new_list[2::] # keep interest variables 


In [301]:
new_list

['year',
 'mes',
 'nconglome',
 'conglome',
 'vivienda',
 'ubigeo',
 'dominio',
 'percepho',
 'gru52hd2',
 'ingpeihd',
 'mieperho',
 'totmieho',
 'ia01hd',
 'ia02hd',
 'ingbruhd',
 'ingnethd',
 'pagesphd',
 'ingindhd',
 'ingauthd',
 'insedthd',
 'insedlhd',
 'paesechd',
 'ingseihd',
 'isecauhd',
 'ingexthd',
 'ingtrahd',
 'ingtexhd',
 'ingrenhd',
 'ingoexhd',
 'g05hd',
 'ig06hd',
 'g05hd1',
 'ig06hd1',
 'g05hd2',
 'ig06hd2',
 'g05hd3',
 'ig06hd3',
 'g05hd4',
 'ig06hd4',
 'g05hd5',
 'ig06hd5',
 'g05hd6',
 'ig06hd6',
 'g07hd',
 'ig08hd',
 'ig03hd1',
 'ig03hd2',
 'ig03hd3',
 'ig03hd4',
 'sg23',
 'sig24',
 'sg25',
 'sig26',
 'ga03hd',
 'ga04hd',
 'sg42',
 'sg42d',
 'sg421',
 'sg42d1',
 'sg422',
 'sg42d2',
 'sg423',
 'sg42d3',
 'ingtprhd',
 'ingtpuhd',
 'ingtpu01',
 'ingtpu02',
 'ingtpu03',
 'ingtpu04',
 'ingtpu05',
 'gru11hd',
 'gru12hd1',
 'gru12hd2',
 'gru13hd1',
 'gru13hd2',
 'gru13hd3',
 'gru13hd4',
 'gru14hd',
 'gru14hd1',
 'gru14hd2',
 'gru14hd3',
 'gru14hd4',
 'gru14hd5',
 'gru14hd6

In [302]:
panel

Unnamed: 0,cong,viv,year_16,mes_16,nconglome_16,conglome_16,vivienda_16,hog,ubigeo_16,dominio_16,...,linea_20,pobreza_20,tipocuestionario_20,tipoentrevista_20,factor07_20,nconglome_20,sub_conglome_20,lineav_rpl_20,lineav_20,pobrezav_20
0,005012,039,2016,01,007083,005012,039,11,010101,sierra norte,...,303.520996,no pobre,cuestionario reducido - durante la emergencia ...,telefónico,39.763752,007083,00,657,553.222473,no vulnerable
1,005012,025,2016,01,007083,005012,025,11,010101,sierra norte,...,303.520996,no pobre,cuestionario reducido - durante la emergencia ...,telefónico,39.763752,007083,00,657,546.850037,no vulnerable
2,005012,012,2016,01,007083,005012,012,11,010101,sierra norte,...,303.520996,no pobre,cuestionario reducido - durante la emergencia ...,telefónico,39.763752,007083,00,657,561.609619,vulnerable no pobre
3,005017,075,2016,05,007093,005017,075,11,010101,sierra norte,...,303.520996,no pobre,cuestionario reducido - durante la emergencia ...,telefónico,55.669254,007093,00,657,559.775146,vulnerable no pobre
4,005017,008,2016,05,007093,005017,008,11,010101,sierra norte,...,303.520996,no pobre,cuestionario reducido - durante la emergencia ...,telefónico,55.669254,007093,00,657,563.189392,no vulnerable
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1934,010360,063,2016,11,008793,010360,063,11,220504,selva,...,247.342163,no pobre,cuestionario completo - durante la emergencia ...,presencial,111.686325,008793,00,657,569.045166,vulnerable no pobre
1935,010360,064,2016,11,008793,010360,064,11,220504,selva,...,247.342163,no pobre,cuestionario completo - durante la emergencia ...,presencial,111.686325,008793,00,657,576.689819,vulnerable no pobre
1936,010360,142,2016,11,008793,010360,142,11,220504,selva,...,247.342163,no pobre,cuestionario completo - durante la emergencia ...,presencial,111.686325,008793,00,657,573.543945,vulnerable no pobre
1937,010360,144,2016,11,008793,010360,144,11,220504,selva,...,247.342163,no pobre,cuestionario completo - durante la emergencia ...,presencial,111.686325,008793,00,657,571.337524,no vulnerable


In [193]:
panel.columns

Index(['cong', 'viv', 'year_16', 'mes_16', 'nconglome_16', 'conglome_16',
       'vivienda_16', 'hog', 'ubigeo_16', 'dominio_16',
       ...
       'linea_20', 'pobreza_20', 'tipocuestionario_20', 'tipoentrevista_20',
       'factor07_20', 'nconglome_20', 'sub_conglome_20', 'lineav_rpl_20',
       'lineav_20', 'pobrezav_20'],
      dtype='object', length=804)

## Reshape Wide to long 

In [303]:
reshape_panel = pd.wide_to_long(panel, stubnames = new_list, i = ['cong', 'viv', 'hog'] , j = 'period' , sep = '_').reset_index()

In [1]:
var_labels = {"muph" : "Identificador del hogar", "unidos":"Dummy si el distrito es beneficiado por el programa"}

In [3]:
var_labels 

val_labels = {"unidos":{"1": "El programa si aplica en el distrito", "0": "El programa no aplica en el distrito"}}

In [5]:
val_labels['unidos']

{'1': 'El programa si aplica en el distrito',
 '0': 'El programa no aplica en el distrito'}

In [195]:
reshape_panel.columns

Index(['cong', 'viv', 'hog', 'period', 'year', 'mes', 'nconglome', 'conglome',
       'vivienda', 'ubigeo',
       ...
       'ingtpu09', 'ingtpu091', 'inghog1d1', 'inghog2d1', 'tipocuestionario',
       'tipoentrevista', 'sub_conglome', 'lineav_rpl', 'lineav', 'pobrezav'],
      dtype='object', length=184)

In [304]:
reshape_panel

Unnamed: 0,cong,viv,hog,period,year,mes,nconglome,conglome,vivienda,ubigeo,...,ingtpu09,ingtpu091,inghog1d1,inghog2d1,tipocuestionario,tipoentrevista,sub_conglome,lineav_rpl,lineav,pobrezav
0,005012,039,11,16,2016,01,007083,005012,039,010101,...,,,,,,,,,,
1,005012,039,11,17,2017,01,007083,005012,039,010101,...,,,,,,,,,,
2,005012,039,11,18,2018,06,007083,005012,039,010101,...,,,,,,,,,,
3,005012,039,11,19,2019,06,007083,005012,039,010101,...,,,,,,,,,,
4,005012,039,11,20,2020,06,007083,005012,039,010101,...,0.0,0.0,36375.703125,31562.705078,cuestionario reducido - durante la emergencia ...,telefónico,00,657.0,553.222473,no vulnerable
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9690,010370,103,11,16,2016,01,044447,010370,103,230101,...,,,,,,,,,,
9691,010370,103,11,17,2017,01,044447,010370,103,230101,...,,,,,,,,,,
9692,010370,103,11,18,2018,02,044447,010370,103,230101,...,,,,,,,,,,
9693,010370,103,11,19,2019,02,044447,010370,103,230101,...,,,,,,,,,,


# References

https://docs.python.org/es/3/library/re.html

https://pandas.pydata.org/docs/reference/api/pandas.wide_to_long.html


https://pandas.pydata.org/docs/user_guide/reshaping.html