# Tanzania CEFA 2017 EE dataset
This notebook is used to create the dataset for CEFA 201 EE. The dataset consists of multiple files.
- 2017_Q_nexus_HH
- 2017_Q_nexus_HH_Family_info
- 2017_Q_nexus_HH_Appliances
- 2017_Q_nexus_US
- 2017_Q_nexus_US_Appliances
- Tanzania
- Village_GPS_info
- GADM_level_3
- GADM_level_1
- GADM_level_2



In [4]:
import os
import sys
import pandas as pd
import numpy as np

sys.path.append("../../")  # Adds higher directory to python modules path.
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
from core.ODEDataset import ODEDataset
from utils import common_modifiers, appliances_modifiers, geospatial, socio_modifiers

import pathlib
from utils import constants


In [5]:
DB_ROOT = "../../playground/data/CEFA_Tanzania/2017_EE"
root = pathlib.Path(DB_ROOT)

ID_COL = "ID"

## Clusters
Clusters are group of columns used together to create a new feature. 


## Categories
Map the original values to the standard values.


In [6]:
Education_level_original2final = {'P': 'Primary education',
                                  'S': 'Lower secondary education',
                                  'U': 'Bachelor\'s or equivalent level'}

Socio_status_original2final = {
    'Self-F Self-N': np.nan,
    'Self-F': 'Own-account worker farm',
    'Self-N': 'Own-account worker non-farm',
    'Sal-N Self-F': 'Worker not classifiable by status',
    'Cas': 'Worker not classifiable by status',
}
Tariff_payment_frequency_original2final = {
    'W': 'Weekly',
    'M': 'Monthly',
    'M3': 'Every 3 months',
}


## General
Merge the general information from the datasets:
- 2017_Q_nexus_HH
- 2017_Q_nexus_US

In [7]:
Q_nexus_HH = ODEDataset("2017_Q_nexus_HH")
Q_nexus_US = ODEDataset("Q_nexus_US")

Q_nexus_HH.from_excel(root.joinpath("2017_Q_nexus_HH.xlsx"))
Q_nexus_US.from_excel(root.joinpath("2017_Q_nexus_US.xlsx"))

Q_nexus_HH = Q_nexus_HH.apply(common_modifiers.rename({
    "meta-instanceID": ID_COL,
}))
Q_nexus_US = Q_nexus_US.apply(common_modifiers.rename({
    "meta-instanceID": ID_COL,
}))

cols_in_hh_not_in_us = ['Q_2017_HH', 'Role_old', 'HH_members', 'HH_females', 'SET-OF-Family_info', 'HH_source',
                        'HH_source_other',
                        'HH_business', 'HH_business_old', 'HH_business_other_old', 'Farming_day', 'Farming_year',
                        'HH_income',
                        'HH_expenditure', 'HH_expenditure_old', 'Electricity_use-Housework_time',
                        'Electricity_use-Housework_evening',
                        'Electricity_use-Study_time', 'Electricity_use-Study_time_ee', 'Electricity_use-Study_diff_why',
                        'Unconnection_reason', 'Other_device', 'Recovery', 'Study_lighting', 'Opinion-untitled29',
                        'Opinion-untitled31',
                        'Opinion-untitled32', 'Opinion-untitled33', 'Opinion-untitled34', 'Opinion-untitled43',
                        'Opinion-untitled44',
                        'Opinion-untitled45', 'Opinion-untitled46', 'Loans', 'Loan_obstacle', 'Loan_obstacle_other']

cols_in_us_not_in_hh = ['Q_2017_US', 'Class', 'Class_old', 'Starting', 'Starting_old', 'Employees', 'Operation',
                        'Operating_hours',
                        'Customers', 'Sales_min', 'Sales_max', 'Expenses', 'Expenses_other', 'Problems', 'Owner',
                        'Owner_other',
                        'Electricity_use-Limits', 'Unconnectino_reason', 'Device_other']

Q_nexus_HH = Q_nexus_HH.apply(common_modifiers.add_const_driver_many({c: np.nan for c in cols_in_us_not_in_hh}))
Q_nexus_US = Q_nexus_US.apply(common_modifiers.add_const_driver_many({c: np.nan for c in cols_in_hh_not_in_us}))

Q_General = Q_nexus_HH.concat(Q_nexus_US)

Q_General.preview()

Unnamed: 0,SubmissionDate,ID,Q_2017_HH,Date,Location-Latitude,Location-Longitude,Location-Altitude,Location-Accuracy,Village,Interviewee,...,Sales_min,Sales_max,Expenses,Expenses_other,Problems,Owner,Owner_other,Electricity_use-Limits,Unconnectino_reason,Device_other
0,11-ott-2017 10.00.41,uuid:06f6f950-541b-4d0e-af35-2dc6ea7e396d,F02,2017-09-27,,,,,Ukalawa,Come 01,...,,,,,,,,,,
1,11-ott-2017 10.02.05,uuid:48aacebf-91a4-4857-b47f-1ca6983ed6e4,F06,2017-09-27,,,,,Ukalawa,", Fredi Lukwale",...,,,,,,,,,,
2,11-ott-2017 10.03.08,uuid:9e706c24-564d-48c1-a88c-849c267f38d1,F08,2017-09-27,,,,,Ukalawa,Baraka Mpollo,...,,,,,,,,,,
3,11-ott-2017 10.03.21,uuid:0a585017-59d8-4674-8490-c1e198a2a216,F0,2017-09-27,,,,,Ukalawa,Nord msoso,...,,,,,,,,,,
4,11-ott-2017 10.03.23,uuid:80c0ea7a-ccf2-4ffa-a913-6b6017b65777,F11,2017-09-27,,,,,Ukalawa,Argentina nyagwe,...,,,,,,,,,,


In [8]:
gadm_level_1_df = pd.read_excel(root.joinpath("GADM_level_1.xlsx"))
gadm_level_2_df = pd.read_excel(root.joinpath("GADM_level_2.xlsx"))
gadm_level_3_df = pd.read_excel(root.joinpath("GADM_level_3.xlsx"))

village_gps_info_df = pd.read_excel(root.joinpath("Village_GPS_info.xlsx"))

In [9]:
village_gps_info_df.head(40)

Unnamed: 0,ID,Name,Latitude,Longitude,GADM_level_1,GADM_level_2,GADM_level_3,Climate1,City_dist,Grid_dist,masl,Population
0,1,Chissinguane,-20.204905,34.155214,Sofala,Buzi,Estaquinha,3,32.334,61.408,146,25622.0
1,2,Guaragua,-20.8628,32.5842,Manica,Mossurize,Chiurairue,6,41.447,27.612,366,7933.0
2,3,Ndoro,-18.116528,34.943222,Sofala,Caia,Caia-sede,3,55.286,63.773,37,19161.0
3,4,Nyombo,-9.199581,34.922727,Njombe,Njombe,Ikuna,12,39.542,18.013,1819,
4,5,Ikondo,-9.07955,35.240346,Njombe,Njombe,Ikondo,11,52.133,33.952,1235,4011.0
5,6,Ninga,-9.106601,35.002982,Njombe,Njombe,Ninga,12,33.834,25.986,1784,2865.0
6,7,Mahongole,-8.563448,34.267768,Njombe,Njombe,Mahongole,12,70.757,25.564,1034,2250.0
7,8,Ukalawa,-9.158213,35.293634,Njombe,Njombe,Ikondo,12,61.748,43.771,1554,1766.0
8,9,Kitole,-9.154372,35.32822,Njombe,Njombe,Ikondo,12,64.797,43.878,1427,1321.0
9,10,Kidegembye,-9.280235,35.001627,Njombe,Njombe,Kidegembye,12,51.021,27.148,1734,


### GeoSpatial Information

In [10]:
Q_General = Q_General.apply(common_modifiers.add_const_driver('GADM_level_0', 'Tanzania'))
Q_General = Q_General.new_feature("Township/Village",
                                  lambda row: row["Village"])

Q_General = Q_General.new_feature("GADM_level_1",
                                  geospatial.gis_info_by_village_level("GADM_level_1",
                                                                       village_gps_info_df))

Q_General = Q_General.new_feature("GADM_level_2",
                                  geospatial.gis_info_by_village_level("GADM_level_2",
                                                                       village_gps_info_df))

Q_General = Q_General.new_feature("GADM_level_3",
                                  geospatial.gis_info_by_village_level("GADM_level_3",
                                                                       village_gps_info_df))

Q_General = Q_General.new_feature("Climate_zone_lev_1",
                                  geospatial.gis_info_by_gadm_level('Climate_majority',
                                                                    gadm_level_1_df,
                                                                    'GADM_level_1'))
Q_General = Q_General.new_feature("Climate_zone_lev_2",
                                  geospatial.gis_info_by_gadm_level('Climate_majority',
                                                                    gadm_level_2_df,
                                                                    'GADM_level_2'))


### Socio-Economic Information

In [11]:
Family_info = ODEDataset("2017_Q_nexus_HH_Family_info")
Family_info.from_excel(root.joinpath("2017_Q_nexus_HH_Family_info.xlsx"))
Family_info = Family_info.apply(common_modifiers.rename({
    "PARENT_KEY": ID_COL,
}))
Family_info = Family_info.group_by(ID_COL).drop_columns(['KEY'])
Q_General = Q_General.merge(Family_info, ID_COL)

In [12]:
Q_General.preview()

Unnamed: 0,SubmissionDate,ID,Q_2017_HH,Date,Location-Latitude,Location-Longitude,Location-Altitude,Location-Accuracy,Village,Interviewee,...,GADM_level_1,GADM_level_2,GADM_level_3,Climate_zone_lev_1,Climate_zone_lev_2,HH_age,HH_male,HH_school,HH_education,SET-OF-Family_info_y
0,11-ott-2017 10.00.41,uuid:06f6f950-541b-4d0e-af35-2dc6ea7e396d,F02,2017-09-27,,,,,Ukalawa,Come 01,...,Njombe,Njombe,Ikondo,12,12,"[0, 7, 19]","[1, 1, 2]","[0, 1, 0]","[N, P, Tech]",[uuid:06f6f950-541b-4d0e-af35-2dc6ea7e396d/Fam...
1,11-ott-2017 10.02.05,uuid:48aacebf-91a4-4857-b47f-1ca6983ed6e4,F06,2017-09-27,,,,,Ukalawa,", Fredi Lukwale",...,Njombe,Njombe,Ikondo,12,12,"[0, 7, 19]","[3, 2, 2]","[2, 2, 0]","[P, P, nan]",[uuid:48aacebf-91a4-4857-b47f-1ca6983ed6e4/Fam...
2,11-ott-2017 10.03.08,uuid:9e706c24-564d-48c1-a88c-849c267f38d1,F08,2017-09-27,,,,,Ukalawa,Baraka Mpollo,...,Njombe,Njombe,Ikondo,12,12,[19],[2],[0],[nan],[uuid:9e706c24-564d-48c1-a88c-849c267f38d1/Fam...
3,11-ott-2017 10.03.21,uuid:0a585017-59d8-4674-8490-c1e198a2a216,F0,2017-09-27,,,,,Ukalawa,Nord msoso,...,Njombe,Njombe,Ikondo,12,12,"[0, 7, 19]","[1, 3, 2]","[0, 3, 0]","[N, P, nan]",[uuid:0a585017-59d8-4674-8490-c1e198a2a216/Fam...
4,11-ott-2017 10.03.23,uuid:80c0ea7a-ccf2-4ffa-a913-6b6017b65777,F11,2017-09-27,,,,,Ukalawa,Argentina nyagwe,...,Njombe,Njombe,Ikondo,12,12,"[0, 7, 14, 19]","[1, 1, 1, 2]","[0, 1, 1, 0]","[N, P, P, nan]",[uuid:80c0ea7a-ccf2-4ffa-a913-6b6017b65777/Fam...


In [13]:
tanzania_age_distr = pd.read_excel(root.joinpath("Tanzania.xlsx"))


def calculate_age_distribution(start, finish, threshold, age_distribution: [float]):
    distribution = [0, 0]
    for i in np.arange(start, finish + 1):
        if i < threshold:
            distribution[0] = age_distribution[i]
        else:
            distribution[1] += age_distribution[i]

    return [x / sum(distribution) for x in distribution]


tanzania_age_distribution = tanzania_age_distr['Distr']


In [14]:
Q_General.preview()

Unnamed: 0,SubmissionDate,ID,Q_2017_HH,Date,Location-Latitude,Location-Longitude,Location-Altitude,Location-Accuracy,Village,Interviewee,...,GADM_level_1,GADM_level_2,GADM_level_3,Climate_zone_lev_1,Climate_zone_lev_2,HH_age,HH_male,HH_school,HH_education,SET-OF-Family_info_y
0,11-ott-2017 10.00.41,uuid:06f6f950-541b-4d0e-af35-2dc6ea7e396d,F02,2017-09-27,,,,,Ukalawa,Come 01,...,Njombe,Njombe,Ikondo,12,12,"[0, 7, 19]","[1, 1, 2]","[0, 1, 0]","[N, P, Tech]",[uuid:06f6f950-541b-4d0e-af35-2dc6ea7e396d/Fam...
1,11-ott-2017 10.02.05,uuid:48aacebf-91a4-4857-b47f-1ca6983ed6e4,F06,2017-09-27,,,,,Ukalawa,", Fredi Lukwale",...,Njombe,Njombe,Ikondo,12,12,"[0, 7, 19]","[3, 2, 2]","[2, 2, 0]","[P, P, nan]",[uuid:48aacebf-91a4-4857-b47f-1ca6983ed6e4/Fam...
2,11-ott-2017 10.03.08,uuid:9e706c24-564d-48c1-a88c-849c267f38d1,F08,2017-09-27,,,,,Ukalawa,Baraka Mpollo,...,Njombe,Njombe,Ikondo,12,12,[19],[2],[0],[nan],[uuid:9e706c24-564d-48c1-a88c-849c267f38d1/Fam...
3,11-ott-2017 10.03.21,uuid:0a585017-59d8-4674-8490-c1e198a2a216,F0,2017-09-27,,,,,Ukalawa,Nord msoso,...,Njombe,Njombe,Ikondo,12,12,"[0, 7, 19]","[1, 3, 2]","[0, 3, 0]","[N, P, nan]",[uuid:0a585017-59d8-4674-8490-c1e198a2a216/Fam...
4,11-ott-2017 10.03.23,uuid:80c0ea7a-ccf2-4ffa-a913-6b6017b65777,F11,2017-09-27,,,,,Ukalawa,Argentina nyagwe,...,Njombe,Njombe,Ikondo,12,12,"[0, 7, 14, 19]","[1, 1, 1, 2]","[0, 1, 1, 0]","[N, P, P, nan]",[uuid:80c0ea7a-ccf2-4ffa-a913-6b6017b65777/Fam...


In [15]:
Q_General = Q_General.new_feature("Number_age_14/17",
                                  common_modifiers.select_multi_section_double_column_equal_to_else_zero('HH_age', 14,
                                                                                                         'HH_male'))

Q_General = Q_General.new_feature("Number_age_18/19",
                                  common_modifiers.select_multi_section_double_column_equal_to_else_zero('HH_age', 18,
                                                                                                         'HH_male'))

Q_General = Q_General.new_feature("Number_age_19+",
                                  common_modifiers.select_multi_section_double_column_equal_to_else_zero('HH_age', 19,
                                                                                                         'HH_male'))
Q_General.preview()

age_distribution_youngsters_adults = calculate_age_distribution(14, 17, 15, tanzania_age_distribution)

age_distribution_adults_elderly = calculate_age_distribution(19, 100, 65, tanzania_age_distribution)

Q_General = Q_General.new_feature("Extracted_age_14/17",
                                  socio_modifiers.extract_age_group_by_age_distribution("Number_age_14/17",
                                                                                        age_distribution_youngsters_adults))

Q_General = Q_General.new_feature("Extracted_age_19+",
                                  socio_modifiers.extract_age_group_by_age_distribution("Number_age_19+",
                                                                                        age_distribution_adults_elderly))

Q_General = Q_General.new_feature("Number_adults",
                                  lambda row: row['Extracted_age_14/17'][1] + row['Number_age_18/19'] +
                                              row['Extracted_age_19+'][0] * 0.535)

Q_General.preview()

Unnamed: 0,SubmissionDate,ID,Q_2017_HH,Date,Location-Latitude,Location-Longitude,Location-Altitude,Location-Accuracy,Village,Interviewee,...,HH_male,HH_school,HH_education,SET-OF-Family_info_y,Number_age_14/17,Number_age_18/19,Number_age_19+,Extracted_age_14/17,Extracted_age_19+,Number_adults
0,11-ott-2017 10.00.41,uuid:06f6f950-541b-4d0e-af35-2dc6ea7e396d,F02,2017-09-27,,,,,Ukalawa,Come 01,...,"[1, 1, 2]","[0, 1, 0]","[N, P, Tech]",[uuid:06f6f950-541b-4d0e-af35-2dc6ea7e396d/Fam...,0,0,2,"[0.0, 0.0]","[0.2104863010053446, 1.7895136989946554]",0.11261
1,11-ott-2017 10.02.05,uuid:48aacebf-91a4-4857-b47f-1ca6983ed6e4,F06,2017-09-27,,,,,Ukalawa,", Fredi Lukwale",...,"[3, 2, 2]","[2, 2, 0]","[P, P, nan]",[uuid:48aacebf-91a4-4857-b47f-1ca6983ed6e4/Fam...,0,0,2,"[0.0, 0.0]","[0.2104863010053446, 1.7895136989946554]",0.11261
2,11-ott-2017 10.03.08,uuid:9e706c24-564d-48c1-a88c-849c267f38d1,F08,2017-09-27,,,,,Ukalawa,Baraka Mpollo,...,[2],[0],[nan],[uuid:9e706c24-564d-48c1-a88c-849c267f38d1/Fam...,0,0,2,"[0.0, 0.0]","[0.2104863010053446, 1.7895136989946554]",0.11261
3,11-ott-2017 10.03.21,uuid:0a585017-59d8-4674-8490-c1e198a2a216,F0,2017-09-27,,,,,Ukalawa,Nord msoso,...,"[1, 3, 2]","[0, 3, 0]","[N, P, nan]",[uuid:0a585017-59d8-4674-8490-c1e198a2a216/Fam...,0,0,2,"[0.0, 0.0]","[0.2104863010053446, 1.7895136989946554]",0.11261
4,11-ott-2017 10.03.23,uuid:80c0ea7a-ccf2-4ffa-a913-6b6017b65777,F11,2017-09-27,,,,,Ukalawa,Argentina nyagwe,...,"[1, 1, 1, 2]","[0, 1, 1, 0]","[N, P, P, nan]",[uuid:80c0ea7a-ccf2-4ffa-a913-6b6017b65777/Fam...,1,0,2,"[0.288797906094194, 0.7112020939058061]","[0.2104863010053446, 1.7895136989946554]",0.823812


In [16]:
Q_General = Q_General.new_feature('Age_', lambda x: [x['Age']])
Q_General = Q_General.new_feature('Role_', lambda x: [x['Role']])

Q_General = Q_General.new_feature('Age_HHH', socio_modifiers.extract_age_of_head('Age_', 'Role_', 'Head'))


In [17]:
Q_General = Q_General.new_feature('Education_level_HHH',
                                  common_modifiers.categorize('Education', Education_level_original2final))
Q_General = Q_General.new_feature('Socio_status_HHH',
                                  common_modifiers.categorize('HH_source', Socio_status_original2final))
Q_General.preview()

Unnamed: 0,SubmissionDate,ID,Q_2017_HH,Date,Location-Latitude,Location-Longitude,Location-Altitude,Location-Accuracy,Village,Interviewee,...,Number_age_18/19,Number_age_19+,Extracted_age_14/17,Extracted_age_19+,Number_adults,Age_,Role_,Age_HHH,Education_level_HHH,Socio_status_HHH
0,11-ott-2017 10.00.41,uuid:06f6f950-541b-4d0e-af35-2dc6ea7e396d,F02,2017-09-27,,,,,Ukalawa,Come 01,...,0,2,"[0.0, 0.0]","[0.2104863010053446, 1.7895136989946554]",0.11261,[30],[Wife],,Primary education,Worker not classifiable by status
1,11-ott-2017 10.02.05,uuid:48aacebf-91a4-4857-b47f-1ca6983ed6e4,F06,2017-09-27,,,,,Ukalawa,", Fredi Lukwale",...,0,2,"[0.0, 0.0]","[0.2104863010053446, 1.7895136989946554]",0.11261,[37],[Wife],,Primary education,Own-account worker farm
2,11-ott-2017 10.03.08,uuid:9e706c24-564d-48c1-a88c-849c267f38d1,F08,2017-09-27,,,,,Ukalawa,Baraka Mpollo,...,0,2,"[0.0, 0.0]","[0.2104863010053446, 1.7895136989946554]",0.11261,[24],[Head],24.0,Primary education,Own-account worker farm
3,11-ott-2017 10.03.21,uuid:0a585017-59d8-4674-8490-c1e198a2a216,F0,2017-09-27,,,,,Ukalawa,Nord msoso,...,0,2,"[0.0, 0.0]","[0.2104863010053446, 1.7895136989946554]",0.11261,[33],[Head],33.0,Lower secondary education,Own-account worker farm
4,11-ott-2017 10.03.23,uuid:80c0ea7a-ccf2-4ffa-a913-6b6017b65777,F11,2017-09-27,,,,,Ukalawa,Argentina nyagwe,...,0,2,"[0.288797906094194, 0.7112020939058061]","[0.2104863010053446, 1.7895136989946554]",0.823812,[30],[Wife],,Primary education,Own-account worker farm


In [18]:
Q_General = Q_General.apply(common_modifiers.rename({
    'HH_income': 'Income_of_household',
    'HH_expenditure': 'Monthly_expenditure',
}))
# HH_with_home_business


Q_General = Q_General.new_feature('HH_with_home_business',
                                  common_modifiers.single_column_assignment('HH_business_old', '-', 'not_empty', 'Yes',
                                                                            'No'))
Q_General = Q_General.apply(common_modifiers.add_const_driver('Survey_date', '2017'))
Q_General = Q_General.new_feature('Measurement_age', lambda x: float(x['Survey_date']) - float(x['Electricity_use-Acces_date']))
Q_General.preview()



Unnamed: 0,SubmissionDate,ID,Q_2017_HH,Date,Location-Latitude,Location-Longitude,Location-Altitude,Location-Accuracy,Village,Interviewee,...,Extracted_age_19+,Number_adults,Age_,Role_,Age_HHH,Education_level_HHH,Socio_status_HHH,HH_with_home_business,Survey_date,Measurement_age
0,11-ott-2017 10.00.41,uuid:06f6f950-541b-4d0e-af35-2dc6ea7e396d,F02,2017-09-27,,,,,Ukalawa,Come 01,...,"[0.2104863010053446, 1.7895136989946554]",0.11261,[30],[Wife],,Primary education,Worker not classifiable by status,Yes,2017,0.0
1,11-ott-2017 10.02.05,uuid:48aacebf-91a4-4857-b47f-1ca6983ed6e4,F06,2017-09-27,,,,,Ukalawa,", Fredi Lukwale",...,"[0.2104863010053446, 1.7895136989946554]",0.11261,[37],[Wife],,Primary education,Own-account worker farm,Yes,2017,1.0
2,11-ott-2017 10.03.08,uuid:9e706c24-564d-48c1-a88c-849c267f38d1,F08,2017-09-27,,,,,Ukalawa,Baraka Mpollo,...,"[0.2104863010053446, 1.7895136989946554]",0.11261,[24],[Head],24.0,Primary education,Own-account worker farm,,2017,
3,11-ott-2017 10.03.21,uuid:0a585017-59d8-4674-8490-c1e198a2a216,F0,2017-09-27,,,,,Ukalawa,Nord msoso,...,"[0.2104863010053446, 1.7895136989946554]",0.11261,[33],[Head],33.0,Lower secondary education,Own-account worker farm,,2017,1.0
4,11-ott-2017 10.03.23,uuid:80c0ea7a-ccf2-4ffa-a913-6b6017b65777,F11,2017-09-27,,,,,Ukalawa,Argentina nyagwe,...,"[0.2104863010053446, 1.7895136989946554]",0.823812,[30],[Wife],,Primary education,Own-account worker farm,,2017,


In [19]:
Q_General = Q_General.select(
    [ID_COL, 'Monthly_expenditure',
     'Socio_status_HHH', 'Number_adults',
     'Measurement_age',
     'Age_HHH',
     'Education_level_HHH', 'Income_of_household',
     'Climate_zone_lev_1', 'Climate_zone_lev_2'])


In [20]:
Q_General.preview()

Unnamed: 0,ID,Monthly_expenditure,Socio_status_HHH,Number_adults,Measurement_age,Age_HHH,Education_level_HHH,Income_of_household,Climate_zone_lev_1,Climate_zone_lev_2
0,uuid:06f6f950-541b-4d0e-af35-2dc6ea7e396d,18000.0,Worker not classifiable by status,0.11261,0.0,,Primary education,25000.0,12,12
1,uuid:48aacebf-91a4-4857-b47f-1ca6983ed6e4,50000.0,Own-account worker farm,0.11261,1.0,,Primary education,100000012.0,12,12
2,uuid:9e706c24-564d-48c1-a88c-849c267f38d1,25000.0,Own-account worker farm,0.11261,,24.0,Primary education,1000000.0,12,12
3,uuid:0a585017-59d8-4674-8490-c1e198a2a216,30000.0,Own-account worker farm,0.11261,1.0,33.0,Lower secondary education,2000000.0,12,12
4,uuid:80c0ea7a-ccf2-4ffa-a913-6b6017b65777,15000.0,Own-account worker farm,0.823812,,,Primary education,40000012.0,12,12


## Appliances
General Appliances sections
- Presence_phone_charger
- Presence_iron
- Presence_TV
- Presence_refrigerator/freezer
- Presence_radio/stereo
- Presence_DVD_player
- Presence_fan


In [21]:

US_Appliances = ODEDataset("2017_Q_nexus_US_Appliances")
HH_Appliances = ODEDataset("2017_Q_nexus_HH_Appliances")

US_Appliances.from_excel(root.joinpath("2017_Q_nexus_US_Appliances.xlsx"))
HH_Appliances.from_excel(root.joinpath("2017_Q_nexus_HH_Appliances.xlsx"))


US_Appliances = US_Appliances.apply(common_modifiers.rename({
    "PARENT_KEY": ID_COL,
})).group_by(ID_COL)

HH_Appliances = HH_Appliances.apply(common_modifiers.rename({
    "PARENT_KEY": ID_COL,
})).group_by(ID_COL)



In [22]:
Q_Appliances = HH_Appliances.concat(US_Appliances)
Q_Appliances.preview()

Unnamed: 0,ID,Item_old,Item,Item_power,Item_number,Time_average_old,Time_average,Time_minimum_old,Time_minimum,Functioning_windows,...,ST_FW2_h,ET_FW2_h,ST_FW1,ET_FW1,ST_FW2,ET_FW2,Date_num_1_old,Date_num_1,KEY,SET-OF-Appliance
0,uuid:017c8dcd-ae6c-4684-81b2-f93d75a60564,"[Int light, Est light, Radio, Phone charger]","[Ind_lights, Out_lights, Radio, Charger]","[nan, nan, nan, nan]","[5, 1, 1, 1]","[3.0, 11.0, 1.0, 1.0]","[180, 660, 60, 60]","[5.0, 5.0, 5.0, 5.0]","[300, 300, 300, 300]","[7pm 10pm, 7pm 6am, La sera, Lo usano 4 giorni...",...,"[nan, 19.0, nan, nan]","[nan, 24.0, nan, nan]","[1140.0, 0.0, nan, nan]","[1320.0, 360.0, nan, nan]","[nan, 1140.0, nan, nan]","[nan, 1440.0, nan, nan]","[nan, nan, nan, nan]","[nan, nan, nan, nan]",[uuid:017c8dcd-ae6c-4684-81b2-f93d75a60564/Ele...,[uuid:017c8dcd-ae6c-4684-81b2-f93d75a60564/Ele...
1,uuid:06f6f950-541b-4d0e-af35-2dc6ea7e396d,"[Indoor lights, Outdoor light, Phone charger]","[Ind_lights, Out_lights, Charger]","[nan, nan, nan]","[4, 1, 1]","[4.0, 4.0, 4.0]","[240, 240, 240]","[4.0, 4.0, 4.0]","[240, 240, 240]","[19-22, 19-22, 0-24]",...,"[nan, nan, nan]","[nan, nan, nan]","[1140.0, 1140.0, 0.0]","[1320.0, 1320.0, 1440.0]","[nan, nan, nan]","[nan, nan, nan]","[2017-03-01 00:00:00, Sane, 2016]","[2017.0, nan, 2016.0]",[uuid:06f6f950-541b-4d0e-af35-2dc6ea7e396d/Ele...,[uuid:06f6f950-541b-4d0e-af35-2dc6ea7e396d/Ele...
2,uuid:097ab952-ae6c-4726-9981-968c257fe02f,"[Indoor lights, Outdoor lights, Phone charger,...","[Ind_lights, Out_lights, Charger, TV, Radio, I...","[nan, nan, nan, nan, nan, nan]","[5, 2, 2, 1, 1, 1]","[3.0, 24.0, 3.0, 3.0, 3.0, 0.3]","[180, 1440, 180, 180, 180, 18]","[3.0, 24.0, 3.0, 3.0, 3.0, 0.3]","[180, 1440, 180, 180, 180, 18]","[19-22, 0-24, 0-24, 19-22, 19-22, 6-10 on sund...",...,"[nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan]","[1140.0, 0.0, 0.0, 1140.0, 1140.0, 360.0]","[1320.0, 1440.0, 1440.0, 1320.0, 1320.0, 600.0]","[nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan]","[2004, 2004, 2015, 2010, 2010, 2004]","[2004.0, 2004.0, 2015.0, 2010.0, 2010.0, 2004.0]",[uuid:097ab952-ae6c-4726-9981-968c257fe02f/Ele...,[uuid:097ab952-ae6c-4726-9981-968c257fe02f/Ele...
3,uuid:0a585017-59d8-4674-8490-c1e198a2a216,[Phone charger],[Charger],[nan],[2],[6.0],[360],[6.0],[360],[0-24],...,[nan],[nan],[0.0],[1440.0],[nan],[nan],[2016],[2016.0],[uuid:0a585017-59d8-4674-8490-c1e198a2a216/Ele...,[uuid:0a585017-59d8-4674-8490-c1e198a2a216/Ele...
4,uuid:0c9c4e03-12d5-435f-a2f3-a2edd8b52153,"[Indoor lights, Radio, Phone charger]","[Ind_lights, Radio, Charger]","[nan, nan, nan]","[2, 1, 1]","[3.0, 13.0, 2.0]","[180, 780, 120]","[3.0, 13.0, 2.0]","[180, 780, 120]","[19-22, 2022-07-01 00:00:00, 0-24]",...,"[nan, nan, nan]","[nan, nan, nan]","[1140.0, 420.0, 0.0]","[1320.0, 1320.0, 1440.0]","[nan, nan, nan]","[nan, nan, nan]","[2014, 2017-08-01 00:00:00, 2017-08-01 00:00:00]","[2014.0, 2017.0, 2017.0]",[uuid:0c9c4e03-12d5-435f-a2f3-a2edd8b52153/Ele...,[uuid:0c9c4e03-12d5-435f-a2f3-a2edd8b52153/Ele...


In [23]:
# presence_appliances(data,source,questionnaire,'2017_Q_nexus_HH_Appliances',hh,'Item','888','Charger','presence')
Q_Appliances = Q_Appliances.new_feature("Presence_phone_charger",
                                    appliances_modifiers.presence_appliances("Item", "Charger"))

Q_Appliances = Q_Appliances.new_feature("Presence_iron", appliances_modifiers.presence_appliances("Item", "Iron"))

Q_Appliances = Q_Appliances.new_feature("Presence_TV", appliances_modifiers.presence_appliances("Item", "TV"))

Q_Appliances = Q_Appliances.new_feature("Presence_refrigerator/freezer",
                                    appliances_modifiers.presence_appliances("Item", "Refrigerator"))

Q_Appliances = Q_Appliances.new_feature("Presence_stereo", appliances_modifiers.presence_appliances("Item", "Stereo"))
Q_Appliances = Q_Appliances.new_feature("Presence_radio", appliances_modifiers.presence_appliances("Item", "Radio"))
Q_Appliances = Q_Appliances.new_feature("Presence_radio/stereo",
                                    common_modifiers.multi_unify_presence(
                                        ['Presence_radio', 'Presence_stereo']))

Q_Appliances = Q_Appliances.new_feature("Presence_fan", appliances_modifiers.presence_appliances("Item", "Fan"))
Q_Appliances = Q_Appliances.new_feature("Presence_DVD_player", lambda x: 0)


Q_Appliances = Q_Appliances.select([
    ID_COL, "Presence_phone_charger", "Presence_iron", "Presence_TV", "Presence_refrigerator/freezer",
    "Presence_radio/stereo", "Presence_DVD_player", "Presence_fan"
])

Q_Appliances.preview(1000)



Unnamed: 0,ID,Presence_phone_charger,Presence_iron,Presence_TV,Presence_refrigerator/freezer,Presence_radio/stereo,Presence_DVD_player,Presence_fan
0,uuid:017c8dcd-ae6c-4684-81b2-f93d75a60564,1,0,0,0,1,0,0
1,uuid:06f6f950-541b-4d0e-af35-2dc6ea7e396d,1,0,0,0,0,0,0
2,uuid:097ab952-ae6c-4726-9981-968c257fe02f,1,1,1,0,1,0,0
3,uuid:0a585017-59d8-4674-8490-c1e198a2a216,1,0,0,0,0,0,0
4,uuid:0c9c4e03-12d5-435f-a2f3-a2edd8b52153,1,0,0,0,1,0,0
5,uuid:10c0b8f4-5c5a-4d60-bf12-775d71c60432,1,1,1,0,1,0,0
6,uuid:126e8d12-f9a9-4273-af8d-bab982c3b572,1,0,0,0,0,0,0
7,uuid:1383875e-9142-4988-a321-3c3fbcc556a3,1,0,0,0,0,0,0
8,uuid:19a3d046-4f6c-4a05-8a12-006991982e25,1,0,0,0,1,0,0
9,uuid:22da2475-3d09-44b5-99b3-06f02a3adc2e,1,1,1,0,1,0,0


In [26]:
CEFA_2017_EE = Q_General.merge(Q_Appliances, ID_COL)

CEFA_2017_EE = CEFA_2017_EE.apply(common_modifiers.rename({
    ID_COL: "ID"
}))
CEFA_2017_EE = CEFA_2017_EE.apply(common_modifiers.add_const_driver_many({
    "Hours_available_electricity": np.nan,
    "Years_of_HHH_in_community": np.nan,
    'Dwelling_quality_index': np.nan,
    'Ownership_motorized_vehicle': np.nan,
    'Ownership_small_livestock': np.nan,
    'Clean_fuel': np.nan,
    'Ownership_large_livestock': np.nan,
    'Number_of_rooms': np.nan,
    'HH_with_home_business': np.nan,
    'Tariff_payment_frequency': np.nan
}))
CEFA_2017_EE = CEFA_2017_EE.select(["ID"] + constants.DRIVERS_LIST + constants.PRESENCE_LIST)
CEFA_2017_EE.preview()

Unnamed: 0,ID,Years_of_HHH_in_community,Dwelling_quality_index,Hours_available_electricity,Measurement_age,Monthly_expenditure,Number_of_rooms,Climate_zone_lev_1,Climate_zone_lev_2,Age_HHH,...,Ownership_large_livestock,Clean_fuel,Tariff_payment_frequency,Presence_refrigerator/freezer,Presence_iron,Presence_fan,Presence_DVD_player,Presence_radio/stereo,Presence_phone_charger,Presence_TV
0,uuid:06f6f950-541b-4d0e-af35-2dc6ea7e396d,,,,0.0,18000.0,,12,12,,...,,,,0,0,0,0,0,1,0
1,uuid:48aacebf-91a4-4857-b47f-1ca6983ed6e4,,,,1.0,50000.0,,12,12,,...,,,,0,0,0,0,0,0,0
2,uuid:0a585017-59d8-4674-8490-c1e198a2a216,,,,1.0,30000.0,,12,12,33.0,...,,,,0,0,0,0,0,1,0
3,uuid:f51bfcdc-6e95-409a-bd4f-9588017d89e7,,,,3.0,70000.0,,12,12,,...,,,,0,0,0,0,0,1,0
4,uuid:540f857c-9ae9-4172-8ee6-934cd16f1b16,,,,2.0,25000.0,,12,12,38.0,...,,,,0,0,0,0,1,1,0


In [27]:
CEFA_2017_EE.to_csv(root.joinpath("CEFA_2017_EE.csv"))

<core.ODEDataset.ODEDataset at 0x7981caa5c090>