In [1]:
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
import pandas as pd
from ODEDataset import ODEDataset
from modifiers import *
from utils.categories import *
from utils.helpers import *
from utils.constants import *

import math

## MTF_HH_Core_Survey

In [2]:
# Load the dataset

MTF_HH_Core_Survey = ODEDataset('kenya/MTF_HH_Core_Survey').from_csv(
    "../playground/data/ESMAP/kenya/MTF_HH_Core_Survey.csv", encoding='ISO-8859-1')

MTF_HH_Core_Survey = MTF_HH_Core_Survey.apply(add_const_driver_many({
    "Exchange_rate_PPP": 41.6347846984863,
    "Survey_date": 2017,
    "Actualization_factor_to_2020": 1.10773366563563,
    "GADM_level_0": 'Kenya',
    "Respondent_category": "Household",
}))

MTF_HH_Core_Survey.to_dataframe()


Unnamed: 0,cluuq,HH_code,prov,cty,dist,div,loc,subloc,class,locality_ur,...,hh_grid,mini_grid,generator,battery,solar,Exchange_rate_PPP,Survey_date,Actualization_factor_to_2020,GADM_level_0,Respondent_category
0,6427_15,5411102012_01,Rift Valley,Kajiado,Kajiado North,Isinya,Kitengela,Olooloitikoshi,Core,Urban,...,1,0,0,0,1,41.634785,2017,1.107734,Kenya,Household
1,6427_15,5411102012_02,Rift Valley,Kajiado,Kajiado North,Isinya,Kitengela,Olooloitikoshi,Core,Urban,...,1,0,0,0,0,41.634785,2017,1.107734,Kenya,Household
2,6427_15,5411102012_03,Rift Valley,Kajiado,Kajiado North,Isinya,Kitengela,Olooloitikoshi,Core,Urban,...,1,0,0,0,1,41.634785,2017,1.107734,Kenya,Household
3,6427_15,5411102012_04,Rift Valley,Kajiado,Kajiado North,Isinya,Kitengela,Olooloitikoshi,Core,Urban,...,1,0,0,0,0,41.634785,2017,1.107734,Kenya,Household
4,6427_15,5411102012_05,Rift Valley,Kajiado,Kajiado North,Isinya,Kitengela,Olooloitikoshi,Core,Urban,...,1,0,0,0,0,41.634785,2017,1.107734,Kenya,Household
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4585,100_13,8010801021_02,Nairobi,Nairobi,Westlands,Westlands,Highridge,Karura,Core,Urban,...,1,0,1,0,0,41.634785,2017,1.107734,Kenya,Household
4586,3_31,8010102011_05,Nairobi,Nairobi,Nairobi West,Dagoretti,Kenyatta (Golf Course),Kenyatta Golf Course,Core,Urban,...,1,0,0,0,0,41.634785,2017,1.107734,Kenya,Household
4587,100_13,8010801021_03,Nairobi,Nairobi,Westlands,Westlands,Highridge,Karura,Core,Urban,...,1,0,0,0,0,41.634785,2017,1.107734,Kenya,Household
4588,100_13,8010801021_04,Nairobi,Nairobi,Westlands,Westlands,Highridge,Karura,Core,Urban,...,1,0,0,0,1,41.634785,2017,1.107734,Kenya,Household


In [3]:
def get_connection_type_per_row(row):
    connection_type = None
    if row["c_c_2"] == "Yes":
        connection_type = 'National grid'
    elif row["c_c_38"] == "Yes":
        connection_type = 'Local mini-grid'
    elif row["c_c_q122_3"] == 1:
        connection_type = 'Solar Home System'

    return connection_type


def get_hours_available_electricity_per_row(row):
    hours_available_electricity = np.nan
    if row['Connection_type'] == 'National grid':
        hours_available_electricity = float(row["c_c_25aii"])
    elif row['Connection_type'] == 'Local mini-grid':
        hours_available_electricity = float(row["c_c_25aii"])

    elif row['Connection_type'] == 'Solar Home System':
        main_shs_dev = row["c_127_device"]
        valid_answer = 'Solar home system (Solar PV system)'
        if main_shs_dev == valid_answer:
            hours_available_electricity = float(row["c_c_147b_Typicalmonth"])
    return hours_available_electricity


In [4]:
MTF_HH_Solar_Roster = ODEDataset('kenya/MTF_HH_Solar_Roster').from_csv(
    "../playground/data/ESMAP/kenya/MTF_HH_Solar_Roster.csv", encoding='ISO-8859-1')

MTF_HH_Core_Survey = MTF_HH_Core_Survey.merge(MTF_HH_Solar_Roster, on='cluuq', how='left')

MTF_HH_Core_Survey = MTF_HH_Core_Survey.new_feature("Connection_type", get_connection_type_per_row).new_feature(
    "Hours_available_electricity", get_hours_available_electricity_per_row)


In [5]:
def dwelling_ownership(row, ref_question_1, ref_question_2, ref_yes, ref_no, ref_rented, ref_free):
    result = np.nan
    if row[ref_question_1] == ref_yes:
        result = 'Owned'
    elif (row[ref_question_1] == ref_no) and (row[ref_question_2] == ref_rented):
        result = 'Rented'
    elif (row[ref_question_1] == ref_no) and (row[ref_question_2] == ref_free):
        result = 'Used for free'
    return result


def dwelling_toilet(row: pd.Series):
    def get_value(col):
        value = row[col]
        if is_nan(value):
            return Dwelling_Toilet_Quality_label2id['None']

        return Dwelling_Toilet_Quality_label2id[value]

    toilet_1 = get_value('b_b_13A')
    toilet_2 = get_value('b_b_13B')
    toilet_3 = get_value('b_b_13C')

    return toilet_1 | toilet_2 | toilet_3


def categorize(feature: str, categories: dict):
    def inner(row):
        value = row[feature]
        if value is None or value is np.nan:
            return np.nan
        return categories[value]

    return inner


def get_dwelling_quality(row: pd.Series):
    row['Dwelling_wall'] = Dwelling_Wall_Quality_label2id[row['b_b_10']]
    row['Dwelling_roof'] = Dwelling_Roof_Quality_label2id[row['b_b_11']]
    row['Dwelling_floor'] = Dwelling_Floor_Quality_label2id[row['b_b_12']]
    row['Dwelling_water'] = Dwelling_Water_Quality_label2id[row['b_b_14']]



In [6]:

MTF_HH_Core_Survey = MTF_HH_Core_Survey.new_feature("House_ownership_rental_free",
                                                    lambda x: dwelling_ownership(x, 'b_b_7', 'b_b_8', 'Yes', 'No',
                                                                                 'Rented', 'Free'))



In [7]:
Dwelling_Quality = MTF_HH_Core_Survey.new_feature("Dwelling_wall",
                                                  categorize("b_b_10", Dwelling_Wall_Quality_label2id))

In [8]:
Dwelling_Quality = Dwelling_Quality.new_feature("Dwelling_roof",
                                                categorize("b_b_11", Dwelling_Roof_Quality_label2id))

In [9]:
Dwelling_Quality = Dwelling_Quality.new_feature("Dwelling_floor",
                                                categorize("b_b12", Dwelling_Floor_Quality_label2id))

In [10]:
Dwelling_Quality = Dwelling_Quality.new_feature("Dwelling_water",
                                                categorize("b_b_14", Dwelling_Water_Quality_label2id))


In [11]:
Dwelling_Quality.to_dataframe().head()

Unnamed: 0,cluuq,HH_code,prov_x,cty_x,dist_x,div_x,loc_x,subloc_x,class,locality_ur_x,...,c_144_other,PARENT_KEY_y,KEY,Connection_type,Hours_available_electricity,House_ownership_rental_free,Dwelling_wall,Dwelling_roof,Dwelling_floor,Dwelling_water
0,6427_15,5411102012_01,Rift Valley,Kajiado,Kajiado North,Isinya,Kitengela,Olooloitikoshi,Core,Urban,...,,uuid:52065a81-e6d8-4b14-8de8-3747b224c666,uuid:52065a81-e6d8-4b14-8de8-3747b224c666/c_ma...,National grid,,Owned,1.0,0.0,1.0,1
1,6427_15,5411102012_01,Rift Valley,Kajiado,Kajiado North,Isinya,Kitengela,Olooloitikoshi,Core,Urban,...,,uuid:8bca080f-fbce-43f6-a399-f1440db0abc1,uuid:8bca080f-fbce-43f6-a399-f1440db0abc1/c_ma...,National grid,,Owned,1.0,0.0,1.0,1
2,6427_15,5411102012_01,Rift Valley,Kajiado,Kajiado North,Isinya,Kitengela,Olooloitikoshi,Core,Urban,...,,uuid:8bca080f-fbce-43f6-a399-f1440db0abc1,uuid:8bca080f-fbce-43f6-a399-f1440db0abc1/c_ma...,National grid,,Owned,1.0,0.0,1.0,1
3,6427_15,5411102012_02,Rift Valley,Kajiado,Kajiado North,Isinya,Kitengela,Olooloitikoshi,Core,Urban,...,,uuid:52065a81-e6d8-4b14-8de8-3747b224c666,uuid:52065a81-e6d8-4b14-8de8-3747b224c666/c_ma...,National grid,,Owned,1.0,0.0,1.0,1
4,6427_15,5411102012_02,Rift Valley,Kajiado,Kajiado North,Isinya,Kitengela,Olooloitikoshi,Core,Urban,...,,uuid:8bca080f-fbce-43f6-a399-f1440db0abc1,uuid:8bca080f-fbce-43f6-a399-f1440db0abc1/c_ma...,National grid,,Owned,1.0,0.0,1.0,1


In [12]:
Dwelling_Quality = Dwelling_Quality.new_feature("Dwelling_toilet", dwelling_toilet)


In [13]:

Dwelling_Quality.to_dataframe().head()

Unnamed: 0,cluuq,HH_code,prov_x,cty_x,dist_x,div_x,loc_x,subloc_x,class,locality_ur_x,...,PARENT_KEY_y,KEY,Connection_type,Hours_available_electricity,House_ownership_rental_free,Dwelling_wall,Dwelling_roof,Dwelling_floor,Dwelling_water,Dwelling_toilet
0,6427_15,5411102012_01,Rift Valley,Kajiado,Kajiado North,Isinya,Kitengela,Olooloitikoshi,Core,Urban,...,uuid:52065a81-e6d8-4b14-8de8-3747b224c666,uuid:52065a81-e6d8-4b14-8de8-3747b224c666/c_ma...,National grid,,Owned,1.0,0.0,1.0,1,1
1,6427_15,5411102012_01,Rift Valley,Kajiado,Kajiado North,Isinya,Kitengela,Olooloitikoshi,Core,Urban,...,uuid:8bca080f-fbce-43f6-a399-f1440db0abc1,uuid:8bca080f-fbce-43f6-a399-f1440db0abc1/c_ma...,National grid,,Owned,1.0,0.0,1.0,1,1
2,6427_15,5411102012_01,Rift Valley,Kajiado,Kajiado North,Isinya,Kitengela,Olooloitikoshi,Core,Urban,...,uuid:8bca080f-fbce-43f6-a399-f1440db0abc1,uuid:8bca080f-fbce-43f6-a399-f1440db0abc1/c_ma...,National grid,,Owned,1.0,0.0,1.0,1,1
3,6427_15,5411102012_02,Rift Valley,Kajiado,Kajiado North,Isinya,Kitengela,Olooloitikoshi,Core,Urban,...,uuid:52065a81-e6d8-4b14-8de8-3747b224c666,uuid:52065a81-e6d8-4b14-8de8-3747b224c666/c_ma...,National grid,,Owned,1.0,0.0,1.0,1,1
4,6427_15,5411102012_02,Rift Valley,Kajiado,Kajiado North,Isinya,Kitengela,Olooloitikoshi,Core,Urban,...,uuid:8bca080f-fbce-43f6-a399-f1440db0abc1,uuid:8bca080f-fbce-43f6-a399-f1440db0abc1/c_ma...,National grid,,Owned,1.0,0.0,1.0,1,1


In [14]:
import math


# Access_to_credit
# b_b_20A	 	Loan_cluster
# b_b_20B	 	Loan_cluster
# b_b_20C	 	Loan_cluster
# b_b_20D	 	Loan_cluster
# b_b_20E	 	Loan_cluster
def access_credit(row: pd.Series):
    a = row['b_b_20A']
    b = row['b_b_20B']
    c = row['b_b_20C']
    d = row['b_b_20D']
    temp = [a, b, c, d]

    # one is a string and not  'Cannot get a loan'
    def valid_loan(x):
        if x == 'Cannot get a loan':
            return False

        if isinstance(x, str):
            return True

    if any([valid_loan(x) for x in temp]):
        return "Yes"
    return "No"


# Price_candles

def get_price_candles(row: pd.Series):
    ref_answer = 'Yes'
    ref_question_1 = 'f_f_candle_10'
    ref_question_2 = 'f_f_candle_11'
    ref_question_3 = 'f_f_3_Candle_1'
    price_alternative = np.nan
    if row[ref_question_3] == ref_answer:
        if (row[ref_question_1] != 'Yes') and (row[ref_question_2] != 'Yes'):
            quantity = float(row[ref_question_1])
            expenditure = float(row[ref_question_2])
            if (quantity != 888 and quantity != 0) and expenditure != 888:
                price_alternative = expenditure / quantity

    return price_alternative


def get_price_alternative(row: pd.Series, ref_question_1, ref_question_2):
    price_alternative = np.nan
    if (row[ref_question_1] != 'Yes') and (row[ref_question_2] != 'Yes'):
        quantity = float(row[ref_question_1])
        expenditure = float(row[ref_question_2])
        if (quantity != 888 and quantity != 0) and expenditure != 888:
            price_alternative = expenditure / quantity
    return price_alternative


def calculate_expenditure(row):
    expenditure = 0
    weekly_expenditure_cluster = ['l_l_1', 'l_l_2', 'l_l_3', 'l_l_4', 'l_l_5', 'l_l_6', 'l_l_7', 'l_l_8', 'l_l_9', ]
    monthly_expenditure_cluster = ['l_l_12', 'l_l_13', 'l_l_14', 'l_l_15', 'l_l_16', 'l_l_17', 'l_l_18', 'l_l_19']
    yearly_expenditure_cluster = ['l_l_20', 'l_l_21', 'l_l_22', 'l_l_23', 'l_l_24', 'l_l_25', 'l_l_26', 'l_l_27', ]

    for i in weekly_expenditure_cluster:
        if (row[i] == 'Do not know') or (row[i] == '111') or math.isnan(float(row[i])): \
                return np.nan

        expenditure += float(row[i]) * 52

    for i in monthly_expenditure_cluster:
        if row[i] == 'Do not know':
            expenditure = np.nan
            return expenditure
        if i == 'l_l_17' and math.isnan(float(row[i])) == True:
            row[i] = 0
        if row[i] == '111' or math.isnan(float(row[i])) == True:
            expenditure = np.nan
            return expenditure
        expenditure += float(row[i]) * 12

    for i in yearly_expenditure_cluster:
        if row[i] == 'Do not know' or row[i] == '111' or math.isnan(float(row[i])) == True:
            expenditure = np.nan
            return expenditure
        expenditure += float(row[i])

    return expenditure / 12





In [15]:
Access_to_credit = Dwelling_Quality.new_feature("Access_to_credit", access_credit)

Price = Access_to_credit.new_feature("Price_candles", get_price_candles)

Price = Price.new_feature("Price_kerosene", lambda x: get_price_alternative(x, 'h_h_13e', 'h_h_14e'))
Price = Price.new_feature("Price_LPG", lambda x: get_price_alternative(x, 'h_h_13a', 'h_h_14a'))
Price = Price.new_feature("Price_charcoal", lambda x: get_price_alternative(x, 'h_h_13d', 'h_h_14d'))
Price = Price.new_feature("Price_firewood", lambda x: get_price_alternative(x, 'h_h_13b', 'h_h_14b'))



In [16]:
Price.to_dataframe()[['Price_candles', 'Price_kerosene', 'Price_LPG', 'Price_charcoal', 'Price_firewood']]

Unnamed: 0,Price_candles,Price_kerosene,Price_LPG,Price_charcoal,Price_firewood
0,,,176.923077,,
1,,,176.923077,,
2,,,176.923077,,
3,,,283.333333,,
4,,,283.333333,,
...,...,...,...,...,...
28076,,,176.923077,,
28077,,,,,
28078,,,,,
28079,,,,,


In [17]:
Expenditure = Price.new_feature("Monthly_expenditure", calculate_expenditure)

In [18]:
Expenditure.to_dataframe()[['Monthly_expenditure']]

Unnamed: 0,Monthly_expenditure
0,
1,
2,
3,49345.0
4,49345.0
...,...
28076,
28077,
28078,
28079,


In [19]:
# Dwelling_quality_index
def dwelling_quality_index(row: pd.Series):
    dwelling_quality_cluster = ["Dwelling_wall",
                                "Dwelling_roof",
                                "Dwelling_floor",
                                "Dwelling_toilet",
                                "Dwelling_water"
                                ]
    temp = []
    for i in dwelling_quality_cluster:
        if any(t == i for t in row.keys()):
            temp += [row[i]]
    if len(temp) == 5:
        return sum(temp) / len(temp)
    else:
        return np.nan


# Measurement_age
def measurement_age(section_solar: pd.DataFrame):
    def inner(row: pd.Series):
        result = np.nan
        ref_question_1 = 'c_c_7'
        ref_question_2 = 'c_c_38'
        ref_question_solar_1 = 'c_135_yrs'
        ref_question_solar_2 = 'c_127_device'

        try:
            if row['Connection_type'] == 'National grid':
                if row[ref_question_1] != 'HH has always had grid connection' and row[ref_question_1] != "Don't know":
                    result = float(row[ref_question_1])
            if row['Connection_type'] == 'Local mini-grid':
                result = float(row[ref_question_2])
            if row['Connection_type'] == 'Solar Home System':
                temp = []
                for i in range(len(section_solar[ref_question_solar_1])):
                    if section_solar[ref_question_solar_2][i] == 3 or \
                            section_solar[ref_question_solar_2][
                                i] == 'Solar home system (Solar PV system)':
                        temp.append(row[ref_question_solar_1])
                if len(temp) > 0:
                    result = max(temp)
            return result
        except:
            return result

    return inner


# Connection_fee
# connection_fee(data,source,questionnaire,'Derived_variables','MTF_HH_Core_Survey',hh,'c_c_7','c_c_8','c_c_8','MTF_HH_Solar_Roster','c_127_device','c_137_acquisition','c_139_uf_cost')
def connection_fee(section_solar: pd.DataFrame):
    def inner(row: pd.Series):
        connection_fee = np.nan
        result = np.nan
        cond_question_grid = 'c_c_7'
        ref_question_1 = 'c_c_8'
        ref_question_2 = 'c_c_8'
        # solar
        ref_question_solar_1 = 'c_127_device'
        ref_question_solar_2 = 'c_137_acquisition'
        ref_question_solar_3 = 'c_139_uf_cost'

        try:
            if row['Connection_type'] == 'National grid':
                if row[cond_question_grid] != 'Household was already connected' and row[
                    ref_question_1] != 'Household was already connected':
                    result = float(row[ref_question_1])
            elif row['Connection_type'] == 'Local mini-grid':
                result = float(row[ref_question_2])
            elif row['Connection_type'] == 'Solar Home System':
                temp = []
                for i in range(len(section_solar[ref_question_solar_3])):
                    if section_solar[ref_question_solar_2][i] == 3 or (
                            section_solar[ref_question_solar_1][i] == 'Solar home system (Solar PV system)' and
                            section_solar[ref_question_solar_2][i] == 'Bought, fully paid'):
                        temp.append(section_solar[ref_question_solar_3][i])
                if len(temp) > 0:
                    result = sum(temp)
            return result
        except:
            return result

    return inner


# Monthly_electricity_expenditure
def monthly_electricity_expenditure(row: pd.Series):
    ref_question_1 = 'c_c_20'
    ref_question_2 = 'c_c_20'
    result = np.nan
    if row['Connection_type'] == 'National grid':
        result = float(row[ref_question_1])
    elif row['Connection_type'] == 'Local mini-grid':
        result = float(row[ref_question_2])
    return result


# Land_owned
def land_owned(row: pd.Series):
    result = np.nan
    question_dimension = 'n_n_1b'
    question_unit = 'n_n_1b_unit'
    ref_answer_unit_acre = 'Acre'
    ref_answer_unit_sqm = 'sq metres'
    ref_answer_unit_hectare = '-'
    ref_answer_unit_plot = '-'
    ref_answer_unit_ridg = '-'
    if row[question_unit] == ref_answer_unit_acre:
        result = float(row[question_dimension]) * 4046.86
    elif row[question_unit] == ref_answer_unit_sqm:
        result = float(row[question_dimension])
    elif row[question_unit] == ref_answer_unit_hectare:
        result = float(row[question_dimension]) * 10000
    elif row[question_unit] == ref_answer_unit_plot:
        result = float(row[question_dimension]) * 900
    elif row[question_unit] == ref_answer_unit_ridg:
        result = float(row[question_dimension]) * 38.1
    return result


# System_management


# re_categorization(,'System_management',clusters)
def system_management_re_categorization(row: pd.Series):
    ref_question = "Connection_type"
    result = np.nan
    if row[ref_question] in System_management_label2id.keys():
        result = System_management_label2id[row[ref_question]]
    return result


# Monthly_electricity_consumption
# electricity_consumption('c_c_21',888,111,'National grid')
def electricity_consumption(ref_question, wrong_answer_1, wrong_answer_2, function_mode):
    def inner(row: pd.Series):
        result = np.nan
        if row['Connection_type'] == function_mode:
            result = filtering(row, ref_question, wrong_answer_1, wrong_answer_2)
        return result

    return inner


# Tariff_type	tariff_type(data,source,questionnaire,'MTF_HH_Core_Survey',hh,'c_c_12','No one','c_c_17',map_cat,clusters)
# Pre-paid_tariff	pre_paid_tariff(data,source,questionnaire,'MTF_HH_Core_Survey',hh,'c_c_13','c_c_14','Yes','No')

# tariff_type(data,source,questionnaire,'MTF_HH_Core_Survey',hh,'c_c_12','No one','c_c_17',map_cat,clusters)
def tariff_type(row: pd.Series):
    ref_question = 'c_c_12'
    ref_answer = 'No one'
    ref_question_1 = 'c_c_17'
    if row[ref_question] == ref_answer:
        return 'No bill'

    value = row[ref_question_1]
    return re_categorize(value, Tariff_type_original2final)


# pre_paid_tariff(data,source,questionnaire,'MTF_HH_Core_Survey',hh,'c_c_13','c_c_14','Yes','No')
def pre_paid_tariff(row: pd.Series):
    result = np.nan
    ref_answer_1 = 'Yes'
    ref_answer_2 = 'No'
    ref_question_1 = 'c_c_13'
    ref_question_2 = 'c_c_14'

    if row['Tariff_type'] == 'Consumption based':
        if row[ref_question_1] == ref_answer_1:
            if row[ref_question_2] == ref_answer_1:
                result = 'Yes'
            elif row[ref_question_2] == ref_answer_2:
                result = 'No'

    return result


def presence_appliances_string(ref_question_1, ref_appliance):
    def inner(row: pd.Series):
        result = np.nan
        if is_nan(row[ref_question_1]):
            temp = float(row[ref_question_1])
            result = 0
            if temp == ref_appliance:
                result = 1
        else:
            result = 0
        return result

    return inner


In [20]:
Dwelling_Quality = Expenditure.new_feature("Dwelling_quality_index", dwelling_quality_index)

In [21]:
solar = pd.read_csv("../playground/data/ESMAP/kenya/MTF_HH_Solar_Roster.csv", encoding='ISO-8859-1')
Measurement_age = Dwelling_Quality.new_feature("Measurement_age", measurement_age(solar))


In [22]:
Connection_fee = Measurement_age.new_feature("Connection_fee", connection_fee(solar))

Measurement_age.to_dataframe()[['Connection_type', 'Measurement_age', 'Connection_fee']]




Unnamed: 0,Connection_type,Measurement_age,Connection_fee
0,National grid,2.0,3000.0
1,National grid,2.0,3000.0
2,National grid,2.0,3000.0
3,National grid,2.0,75000.0
4,National grid,2.0,75000.0
...,...,...,...
28076,National grid,,
28077,National grid,,
28078,National grid,,
28079,National grid,,


In [23]:
Monthly_electricity_expenditure = Connection_fee.new_feature("Monthly_electricity_expenditure",
                                                             monthly_electricity_expenditure)

Monthly_electricity_expenditure.to_dataframe()[['Monthly_electricity_expenditure']]

Unnamed: 0,Monthly_electricity_expenditure
0,1500.0
1,1500.0
2,1500.0
3,3000.0
4,3000.0
...,...
28076,15000.0
28077,3000.0
28078,3000.0
28079,


In [24]:
Land_owned = Monthly_electricity_expenditure.new_feature("Land_owned", land_owned)

System_management = Land_owned.new_feature("System_management", system_management_re_categorization)

In [25]:
System_management.to_dataframe()[['Connection_type', 'System_management']]

Unnamed: 0,Connection_type,System_management
0,National grid,1.0
1,National grid,1.0
2,National grid,1.0
3,National grid,1.0
4,National grid,1.0
...,...,...
28076,National grid,1.0
28077,National grid,1.0
28078,National grid,1.0
28079,National grid,1.0


In [26]:
Monthly_electricity_consumption = System_management.new_feature("Monthly_electricity_consumption",
                                                                electricity_consumption('c_c_21', 888, 111,
                                                                                        'National grid')
                                                                )

Tariff_Type = Monthly_electricity_consumption.new_feature("Tariff_type", tariff_type)
Pre_paid_tariff = Tariff_Type.new_feature("Pre_paid_tariff", pre_paid_tariff)
Tariff_payment_method = Pre_paid_tariff.new_feature("Tariff_payment_method", lambda x: re_categorize(x["c_c_18"],
                                                                                                     Tariff_payment_method_original2final))

In [27]:
Tariff_payment_method.to_dataframe()[
    ['Tariff_type', "c_c_17", "c_c_12", "c_c_13", "c_c_14", "Pre_paid_tariff", "Tariff_payment_method"]]

Unnamed: 0,Tariff_type,c_c_17,c_c_12,c_c_13,c_c_14,Pre_paid_tariff,Tariff_payment_method
0,,,Energy company,Yes,Yes,,Mobile credit
1,,,Energy company,Yes,Yes,,Mobile credit
2,,,Energy company,Yes,Yes,,Mobile credit
3,,,Energy company,Yes,No,,Mobile credit
4,,,Energy company,Yes,No,,Mobile credit
...,...,...,...,...,...,...,...
28076,,,Energy company,Yes,No,,Mobile credit
28077,,,Landlord,Yes,No,,Cash
28078,,,Landlord,Yes,No,,Cash
28079,,,Landlord,Yes,No,,Cash


In [28]:
Presence = Tariff_payment_method

for key, value in Presence_appliance_group_3_label2id.items():
    Presence = Presence.new_feature(key, presence_appliances_string('m_m_3_group', value))



In [29]:
Presence = Presence.apply(combine_drivers("Presence_light_bulb", unify_presence("Presence_fluorescent_tube","Presence_incandescent_light_bulb")))

Presence = Presence.apply(combine_drivers("Presence_water_heater", unify_presence("Presence_solar_based_water_heater","Presence_traditional_water_heater")))

Presence = Presence.apply(combine_drivers("Presence_phone_charger", unify_presence("Presence_smartphone_charger","Presence_regular_phone_charger")))

Presence = Presence.apply(combine_drivers("Presence_TV", multi_unify_presence(
    ["Presence_black&white_TV", "Presence_color_TV", "Presence_flat_color_TV"])))


In [30]:
df = Presence.to_dataframe()
c = [i for i in df.columns if 'Presence' in i]
df[c]

Unnamed: 0,Presence_incandescent_light_bulb,Presence_fluorescent_tube,Presence_compact_fluorescent_light_bulb,Presence_LED_light_bulb,Presence_radio/stereo,Presence_DVD_player,Presence_fan,Presence_refrigerator/freezer,Presence_microwave_oven,Presence_iron,...,Presence_smartphone_charger,Presence_regular_phone_charger,Presence_black&white_TV,Presence_color_TV,Presence_flat_color_TV,Presence_water_pump,Presence_light_bulb,Presence_water_heater,Presence_phone_charger,Presence_TV
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28076,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
28077,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
28078,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
28079,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:

MTF_HH_Core_Survey = Presence.select([
    'cluuq',
    'Exchange_rate_PPP',
    'Survey_date',
    'Actualization_factor_to_2020',
    'Respondent_category',
    'Connection_type',
    'Hours_available_electricity',
    'House_ownership_rental_free',
    'Dwelling_wall',
    'Dwelling_roof',
    'Dwelling_floor',
    'Dwelling_toilet',
    'Dwelling_water',
    'Access_to_credit',
    'Price_candles',
    'Price_kerosene',
    'Price_LPG',
    'Price_charcoal',
    'Price_firewood',
    'Monthly_expenditure',
    'Dwelling_quality_index',
    'Measurement_age',
    'Connection_fee',
    'Monthly_electricity_expenditure',
    'Land_owned',
    'System_management',
    'Monthly_electricity_consumption',
    'Tariff_type',
    'Pre_paid_tariff',
    'Tariff_payment_method',
    'Presence_light_bulb',
    'Presence_radio/stereo',
    'Presence_DVD_player',
    'Presence_fan',
    'Presence_refrigerator/freezer',
    'Presence_microwave_oven',
    'Presence_iron',
    'Presence_washing_machine',
    'Presence_sewing_machine',
    'Presence_space_heater',
    'Presence_water_heater',
    'Presence_computer',
    'Presence_kettle',
    'Presence_phone_charger',
    'Presence_TV',
    'Presence_water_pump'
])

MTF_HH_Core_Survey.to_csv("../playground/data/ESMAP/kenya/MTF_HH_Core_Survey_final.csv")

<ODEDataset.ODEDataset at 0x76a82a7c0890>

In [32]:
MTF_HH_Core_Survey.to_dataframe()

Unnamed: 0,cluuq,Exchange_rate_PPP,Survey_date,Actualization_factor_to_2020,Respondent_category,Connection_type,Hours_available_electricity,House_ownership_rental_free,Dwelling_wall,Dwelling_roof,...,Presence_iron,Presence_washing_machine,Presence_sewing_machine,Presence_space_heater,Presence_water_heater,Presence_computer,Presence_kettle,Presence_phone_charger,Presence_TV,Presence_water_pump
0,6427_15,41.634785,2017,1.107734,Household,National grid,,Owned,1.0,0.0,...,0,0,0,0,0,0,0,0,0,0
1,6427_15,41.634785,2017,1.107734,Household,National grid,,Owned,1.0,0.0,...,0,0,0,0,0,0,0,0,0,0
2,6427_15,41.634785,2017,1.107734,Household,National grid,,Owned,1.0,0.0,...,0,0,0,0,0,0,0,0,0,0
3,6427_15,41.634785,2017,1.107734,Household,National grid,,Owned,1.0,0.0,...,0,0,0,0,0,0,0,0,0,0
4,6427_15,41.634785,2017,1.107734,Household,National grid,,Owned,1.0,0.0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28076,100_13,41.634785,2017,1.107734,Household,National grid,,Rented,1.0,0.0,...,0,0,0,0,0,0,0,0,0,0
28077,100_13,41.634785,2017,1.107734,Household,National grid,24.0,Rented,1.0,0.0,...,0,0,0,0,0,0,0,0,0,0
28078,100_13,41.634785,2017,1.107734,Household,National grid,24.0,Rented,1.0,0.0,...,0,0,0,0,0,0,0,0,0,0
28079,100_13,41.634785,2017,1.107734,Household,National grid,24.0,Rented,1.0,0.0,...,0,0,0,0,0,0,0,0,0,0


## MTF_HH_Roster

Income_of_household
People_per_household
Education_level_HHH
Male/female_HHH
Marital_status_HHH
Age_HHH
Number_youngsters
Number_adults
Number_elderly
Main_occupation
Number_workers
Socio_status_HHH
HH_with_home_business_raw
HH_with_home_business
Business_pos
Size_of_business_raw
Business_pos_max_revenues
Size_of_business_revenues
Size_of_business_employees_raw
Size_of_business_employees
Seasonal_business


In [33]:
def HH_income(row: pd.Series):
    ref_question_1 = 'a_18_biz_inc'
    ref_question_2 = 'a_18_biz_inc'

    result = 0
    if any(t == 888 or t == 111 or t == 555 for t in row[ref_question_1]) or any(
            t == 888 or t == 111 or t == 555 for t in row[ref_question_2]) or all(
        math.isnan(t) for t in row[ref_question_1]):
        result = np.nan
        return result

    for i in range(len(row[ref_question_1])):
        if ((math.isnan(row[ref_question_1][i]) == False) and (
                row[ref_question_1][i] != 888 or row[ref_question_1][i] != 111)):
            result += row[ref_question_1][i]

    return result

def HH_people(row: pd.Series):
    ref_question = 'a_3_mf'
    return len(row[ref_question])

def age_HHH(row: pd.Series):
    result = np.nan
    ref_question_1 = 'a_5_age'
    HHH_relation_question = 'a_4_rel_hhh'
    HHH_relation_answer = 'Head'

    if row[HHH_relation_question].count(HHH_relation_answer) == 1:
        pos_head = row[HHH_relation_question].index(HHH_relation_answer)
        if row[ref_question_1][pos_head] != 888:
            result = row[ref_question_1][pos_head]

    return result

def age_groups(ref_question, function_mode):
    def inner(row: pd.Series):
        result = 0
        if any(t == 888 for t in row[ref_question]) or any(math.isnan(t) for t in row[ref_question]):
            result = np.nan
            return result
        if function_mode == 'youngsters':
            result = sum(map(lambda x: x < 15, row[ref_question]))
        elif function_mode == 'adults':
            result = sum(map(lambda x: 65 > x > 14, row[ref_question]))
        elif function_mode == 'elderly':
            result = sum(map(lambda x: x > 64, row[ref_question]))

        return result

    return inner

def HH_working_people(row: pd.Series):
    ref_question = 'Main_occupation'
    if any(is_nan(t) for t in row[ref_question]):
        result = np.nan
        return result

    result = sum(map(lambda x: (((x != 'Unemployed') and (isinstance(x, str))) or ((x != 2) and (is_nan(x) == False))),
                     row[ref_question]))
    return result

def socio_status_HHH(row: pd.Series):
    result = np.nan
    HHH_relation_answer = 'Head'
    HHH_relation_question = 'a_4_rel_hhh'
    ref_question = 'Main_occupation'
    if row[HHH_relation_question].count(HHH_relation_answer) == 1:
        pos_head = row[HHH_relation_question].index(HHH_relation_answer)
        result = row[ref_question][pos_head]

    return result

def HH_w_home_business(row: pd.Series):
    result = ['No', np.nan]
    ref_question_1 = 'a_19_hh_ent'
    ref_question_2 = 'Main_occupation'
    ref_question_3 = 'a_5_age'
    ref_answer = 'Yes'
    if any(t == ref_answer for t in row[ref_question_1]):
        result[0] = 'Yes'
        result[1] = [idx for idx, element in enumerate(row[ref_question_1]) if element == ref_answer]
    else:
        for i in range(len(row[ref_question_1])):
            if is_nan(row[ref_question_1][i]) == True and (
                    row[ref_question_2][i] != 'Unemployed' and is_nan(row[ref_question_2][i]) == False):
                result[0] = np.nan
                return result
            elif is_nan(row[ref_question_1][i]) == True and is_nan(row[ref_question_2][i]) == True and \
                    row[ref_question_3][i] >= 15:
                result[0] = np.nan
                return result
    return result


def size_of_business(ref_question_1, ref_question_2):
    def inner(row: pd.Series):
        result = [np.nan, np.nan]
        if not is_nan(row[ref_question_1]):
            if len(row[ref_question_1]) == 1:
                pos_business = row[ref_question_1]
                result[0] = row[pos_business]
                result[1] = pos_business
            elif len(row[ref_question_1]) > 1:
                pos_business = row[ref_question_1]
                temp = []
                for i in pos_business:
                    temp += [row[ref_question_2][i]]
                result[0] = max(temp)
                result[1] = [idx for idx, element in enumerate(row) if element == max(temp)]
                if len(result[1]) != 1:
                    result = [np.nan, np.nan]

        return result

    return inner


def seasonal_business(row: pd.Series):
    result = np.nan
    ref_question_1 = 'Business_pos_max_revenues'
    ref_question_2 = 'a_23_number'
    if not is_nan(row[ref_question_1]):
        if row[ref_question_2][row[ref_question_1][0]] == 12:
            result = 'No'
        elif not is_nan(row[ref_question_2][row[ref_question_1][0]]):
            result = 'Yes'

    return result



In [34]:
MTF_HH_Roster = ODEDataset('kenya/MTF_HH_Roster').from_csv("../playground/data/ESMAP/kenya/MTF_HH_Roster.csv",
                                                           encoding='ISO-8859-1')

MTF_HH_Roster = MTF_HH_Roster.group_by("cluuq")

MTF_HH_Roster = MTF_HH_Roster.new_feature("Income_of_household", HH_income)
MTF_HH_Roster = MTF_HH_Roster.new_feature("People_per_household", lambda row: len(row['a_3_mf']))

MTF_HH_Roster = MTF_HH_Roster.new_feature("Education_level_HHH", lambda x: re_categorize(
    x["a_9a_sch_lev"][0], Education_level_original2final))

MTF_HH_Roster = MTF_HH_Roster.new_feature("Male/female_HHH",
                                          lambda x: re_categorize(x["a_3_mf"][0], Male_Female_original2final))

MTF_HH_Roster = MTF_HH_Roster.new_feature("Marital_status_HHH",
                                          lambda x: re_categorize(x["a_11_mar_stat"][0], Marital_status_original2final))

MTF_HH_Roster = MTF_HH_Roster.new_feature("Main_occupation", lambda x: re_categorize(x["a_14_main_occ"][0],
                                                                                     Main_occupation_original2final))

MTF_HH_Roster.to_dataframe().head()



Unnamed: 0,cluuq,HH_code,prov,cty,dist,div,loc,subloc,locality_urp,locality_ur,...,a_50_ent_solar_use,a_51_ent_sol_mm,PARENT_KEY,key,Income_of_household,People_per_household,Education_level_HHH,Male/female_HHH,Marital_status_HHH,Main_occupation
0,0,"[1090105011_22, 1090105011_22, 1090105011_22, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Bahari, Bahari, Bahari, Bahari, Bahari, Bahar...","[Township, Township, Township, Township, Towns...","[Hospital, Hospital, Hospital, Hospital, Hospi...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[uuid:944ec937-e3e8-47f6-8f54-c6e0ac1f114f, uu...",[uuid:944ec937-e3e8-47f6-8f54-c6e0ac1f114f/a-a...,100000.0,18,,Female,Ended,Unemployed
1,1001_3,"[1090202021_09, 1090202021_09, 1090202021_09, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kikambala, Kikambala, Kikambala, Kikambala, K...","[Junju, Junju, Junju, Junju, Junju, Junju, Jun...","[Kuruwitu, Kuruwitu, Kuruwitu, Kuruwitu, Kuruw...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[uuid:14ca78d4-a486-4a79-8613-facfb5eb36c5, uu...",[uuid:14ca78d4-a486-4a79-8613-facfb5eb36c5/a-a...,194800.0,61,Primary education,Male,Ongoing,Worker not classifiable by status
2,100_13,"[8010801021_05, 8010801021_05, 8010801021_05, ...","[Nairobi, Nairobi, Nairobi, Nairobi, Nairobi, ...","[Nairobi, Nairobi, Nairobi, Nairobi, Nairobi, ...","[Westlands, Westlands, Westlands, Westlands, W...","[Westlands, Westlands, Westlands, Westlands, W...","[Highridge, Highridge, Highridge, Highridge, H...","[Karura, Karura, Karura, Karura, Karura, Karur...","[Urban, Urban, Urban, Urban, Urban, Urban, Urb...","[Urban, Urban, Urban, Urban, Urban, Urban, Urb...",...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[uuid:1fd54666-89f4-4211-b3a3-c882c52ba4ad, uu...",[uuid:1fd54666-89f4-4211-b3a3-c882c52ba4ad/a-a...,,17,Doctoral or equivalent level,Female,Ongoing,Unemployed
3,1026_3,"[1090402012_06, 1090402012_06, 1090402012_06, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Ganze, Ganze, Ganze, Ganze, Ganze, Ganze, Gan...","[Palakumi, Palakumi, Palakumi, Palakumi, Palak...","[Palakumi, Palakumi, Palakumi, Palakumi, Palak...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[uuid:3cb79a20-cb76-4902-b98b-c0007a452f5d, uu...",[uuid:3cb79a20-cb76-4902-b98b-c0007a452f5d/a-a...,152000.0,128,Primary education,Male,Ongoing,Unemployed
4,1027_2,"[1090402021_01, 1090402021_01, 1090402021_01, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Ganze, Ganze, Ganze, Ganze, Ganze, Ganze, Gan...","[Palakumi, Palakumi, Palakumi, Palakumi, Palak...","[Mariakani/Vitsapuni, Mariakani/Vitsapuni, Mar...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[uuid:08ac6fb3-ef2b-474f-a2d6-5f0a925b2de3, uu...",[uuid:08ac6fb3-ef2b-474f-a2d6-5f0a925b2de3/a-a...,96800.0,90,Lower secondary education,Male,Ended,Worker not classifiable by status


In [35]:
MTF_HH_Roster = MTF_HH_Roster.new_feature("Age_HHH", age_HHH)
MTF_HH_Roster = MTF_HH_Roster.new_feature("Number_youngsters", age_groups('a_5_age', 'youngsters'))
MTF_HH_Roster = MTF_HH_Roster.new_feature("Number_adults", age_groups('a_5_age', 'adults'))
MTF_HH_Roster = MTF_HH_Roster.new_feature("Number_elderly", age_groups('a_5_age', 'elderly'))

MTF_HH_Roster = MTF_HH_Roster.new_feature("Number_workers", HH_working_people)
MTF_HH_Roster.to_dataframe().head()


Unnamed: 0,cluuq,HH_code,prov,cty,dist,div,loc,subloc,locality_urp,locality_ur,...,People_per_household,Education_level_HHH,Male/female_HHH,Marital_status_HHH,Main_occupation,Age_HHH,Number_youngsters,Number_adults,Number_elderly,Number_workers
0,0,"[1090105011_22, 1090105011_22, 1090105011_22, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Bahari, Bahari, Bahari, Bahari, Bahari, Bahar...","[Township, Township, Township, Township, Towns...","[Hospital, Hospital, Hospital, Hospital, Hospi...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,18,,Female,Ended,Unemployed,50.0,2.0,15.0,1.0,10
1,1001_3,"[1090202021_09, 1090202021_09, 1090202021_09, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kikambala, Kikambala, Kikambala, Kikambala, K...","[Junju, Junju, Junju, Junju, Junju, Junju, Jun...","[Kuruwitu, Kuruwitu, Kuruwitu, Kuruwitu, Kuruw...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,61,Primary education,Male,Ongoing,Worker not classifiable by status,,24.0,37.0,0.0,33
2,100_13,"[8010801021_05, 8010801021_05, 8010801021_05, ...","[Nairobi, Nairobi, Nairobi, Nairobi, Nairobi, ...","[Nairobi, Nairobi, Nairobi, Nairobi, Nairobi, ...","[Westlands, Westlands, Westlands, Westlands, W...","[Westlands, Westlands, Westlands, Westlands, W...","[Highridge, Highridge, Highridge, Highridge, H...","[Karura, Karura, Karura, Karura, Karura, Karur...","[Urban, Urban, Urban, Urban, Urban, Urban, Urb...","[Urban, Urban, Urban, Urban, Urban, Urban, Urb...",...,17,Doctoral or equivalent level,Female,Ongoing,Unemployed,,6.0,11.0,0.0,10
3,1026_3,"[1090402012_06, 1090402012_06, 1090402012_06, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Ganze, Ganze, Ganze, Ganze, Ganze, Ganze, Gan...","[Palakumi, Palakumi, Palakumi, Palakumi, Palak...","[Palakumi, Palakumi, Palakumi, Palakumi, Palak...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,128,Primary education,Male,Ongoing,Unemployed,,59.0,61.0,8.0,10
4,1027_2,"[1090402021_01, 1090402021_01, 1090402021_01, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Ganze, Ganze, Ganze, Ganze, Ganze, Ganze, Gan...","[Palakumi, Palakumi, Palakumi, Palakumi, Palak...","[Mariakani/Vitsapuni, Mariakani/Vitsapuni, Mar...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,90,Lower secondary education,Male,Ended,Worker not classifiable by status,,28.0,61.0,1.0,33


In [36]:
MTF_HH_Roster = MTF_HH_Roster.new_feature("Socio_status_HHH", socio_status_HHH)
MTF_HH_Roster = MTF_HH_Roster.new_feature("HH_with_home_business_raw", HH_w_home_business)

MTF_HH_Roster = MTF_HH_Roster.new_feature("HH_with_home_business", lambda x: x["HH_with_home_business_raw"][0])
MTF_HH_Roster = MTF_HH_Roster.new_feature("Business_pos", lambda x: x["HH_with_home_business_raw"][1])


In [37]:
MTF_HH_Roster = MTF_HH_Roster.new_feature("Business_pos", lambda x: x["HH_with_home_business_raw"][1])
MTF_HH_Roster = MTF_HH_Roster.new_feature("HH_with_home_business", lambda x: x["HH_with_home_business_raw"][0])


In [38]:
MTF_HH_Roster.to_dataframe()["Business_pos"]

0                  NaN
1      [0, 35, 49, 57]
2                  NaN
3                  NaN
4                 [27]
            ...       
329               [41]
330                NaN
331         [4, 8, 17]
332                NaN
333                [4]
Name: Business_pos, Length: 334, dtype: object

In [39]:

MTF_HH_Roster = MTF_HH_Roster.new_feature("Size_of_business_raw",
                                          size_of_business("Business_pos", "a_25_ent_rev_reg_mm"))
MTF_HH_Roster = MTF_HH_Roster.new_feature("Business_pos_max_revenues", lambda x: x["Size_of_business_raw"][1])
MTF_HH_Roster = MTF_HH_Roster.new_feature("Size_of_business_revenues", lambda x: x["Size_of_business_raw"][0])

MTF_HH_Roster = MTF_HH_Roster.new_feature("Size_of_business_employees_raw",
                                          size_of_business("Business_pos_max_revenues", "a_20_hh_ent_emp"))
MTF_HH_Roster = MTF_HH_Roster.new_feature("Size_of_business_employees",
                                          lambda x: x["Size_of_business_employees_raw"][0])

MTF_HH_Roster = MTF_HH_Roster.new_feature("Seasonal_business", seasonal_business)

MTF_HH_Roster.to_dataframe().head()

  result[0] = row[pos_business]
  result[0] = row[pos_business]


Unnamed: 0,cluuq,HH_code,prov,cty,dist,div,loc,subloc,locality_urp,locality_ur,...,Socio_status_HHH,HH_with_home_business_raw,HH_with_home_business,Business_pos,Size_of_business_raw,Business_pos_max_revenues,Size_of_business_revenues,Size_of_business_employees_raw,Size_of_business_employees,Seasonal_business
0,0,"[1090105011_22, 1090105011_22, 1090105011_22, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Bahari, Bahari, Bahari, Bahari, Bahari, Bahar...","[Township, Township, Township, Township, Towns...","[Hospital, Hospital, Hospital, Hospital, Hospi...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,e,"[nan, nan]",,,"[nan, nan]",,,"[nan, nan]",,
1,1001_3,"[1090202021_09, 1090202021_09, 1090202021_09, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kikambala, Kikambala, Kikambala, Kikambala, K...","[Junju, Junju, Junju, Junju, Junju, Junju, Jun...","[Kuruwitu, Kuruwitu, Kuruwitu, Kuruwitu, Kuruw...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,,"[Yes, [0, 35, 49, 57]]",Yes,"[0, 35, 49, 57]","[nan, nan]",,,"[nan, nan]",,
2,100_13,"[8010801021_05, 8010801021_05, 8010801021_05, ...","[Nairobi, Nairobi, Nairobi, Nairobi, Nairobi, ...","[Nairobi, Nairobi, Nairobi, Nairobi, Nairobi, ...","[Westlands, Westlands, Westlands, Westlands, W...","[Westlands, Westlands, Westlands, Westlands, W...","[Highridge, Highridge, Highridge, Highridge, H...","[Karura, Karura, Karura, Karura, Karura, Karur...","[Urban, Urban, Urban, Urban, Urban, Urban, Urb...","[Urban, Urban, Urban, Urban, Urban, Urban, Urb...",...,,"[nan, nan]",,,"[nan, nan]",,,"[nan, nan]",,
3,1026_3,"[1090402012_06, 1090402012_06, 1090402012_06, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Ganze, Ganze, Ganze, Ganze, Ganze, Ganze, Gan...","[Palakumi, Palakumi, Palakumi, Palakumi, Palak...","[Palakumi, Palakumi, Palakumi, Palakumi, Palak...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,,"[nan, nan]",,,"[nan, nan]",,,"[nan, nan]",,
4,1027_2,"[1090402021_01, 1090402021_01, 1090402021_01, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Ganze, Ganze, Ganze, Ganze, Ganze, Ganze, Gan...","[Palakumi, Palakumi, Palakumi, Palakumi, Palak...","[Mariakani/Vitsapuni, Mariakani/Vitsapuni, Mar...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,,"[Yes, [27]]",Yes,[27],"[[[30.0, 30.0, nan, nan, nan, nan, 5.0, nan, 2...",[27],"a_17_biz_dd [30.0, 30.0, nan, nan, nan, nan...","[[[30.0, 30.0, nan, nan, nan, nan, 5.0, nan, 2...","a_17_biz_dd [30.0, 30.0, nan, nan, nan, nan...",No


In [40]:

MTF_HH_Roster = MTF_HH_Roster.select([
    'cluuq',
    'Income_of_household',
    'People_per_household',
    'Education_level_HHH',
    'Male/female_HHH',
    'Marital_status_HHH',
    'Age_HHH',
    'Number_youngsters',
    'Number_adults',
    'Number_elderly',
    'Main_occupation',
    'Number_workers',
    'Socio_status_HHH',
    'HH_with_home_business_raw',
    'HH_with_home_business',
    'Business_pos',
    'Size_of_business_raw',
    'Business_pos_max_revenues',
    'Size_of_business_revenues',
    'Size_of_business_employees_raw',
    'Size_of_business_employees',
    'Seasonal_business'])




## MTF_HH_Cooking_Data_Final

In [41]:

MTF_HH_Cooking_Data_Final = ODEDataset('kenya/MTF_HH_Cooking_Data_Final').from_csv(
    "../playground/data/ESMAP/kenya/MTF_HH_Cooking_Data_Final.csv", encoding='ISO-8859-1').group_by("cluuq")

MTF_HH_Cooking_Data_Final.to_dataframe().head()


Unnamed: 0,cluuq,HH_code,prov,cty,dist,div,loc,subloc,class,locality_urp,...,i_28_heating_mm4,i_28_heating_mm5,i_28_heating_mm6,i_28_heating_mm7,i_28_heating_mm8,i_28_heating_mm9,i_28_heating_mm10,i_28_heating_mm11,i_28_heating_mm12,i_28_heating_mm13
0,0,"[1090105011_22, 1090105011_21, 1090105011_23]","[Coast, Coast, Coast]","[Kilifi, Kilifi, Kilifi]","[Kilifi, Kilifi, Kilifi]","[Bahari, Bahari, Bahari]","[Township, Township, Township]","[Hospital, Hospital, Hospital]","[Core, Core, Core]","[Rural, Rural, Rural]",...,"[nan, nan, nan]","[nan, nan, nan]","[nan, nan, nan]","[nan, nan, nan]","[nan, nan, nan]","[nan, nan, nan]","[nan, nan, nan]","[nan, nan, nan]","[nan, nan, nan]","[nan, nan, nan]"
1,1001_3,"[1090202021_09, 1090202021_11, 1090202021_11, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kikambala, Kikambala, Kikambala, Kikambala, K...","[Junju, Junju, Junju, Junju, Junju, Junju, Jun...","[Kuruwitu, Kuruwitu, Kuruwitu, Kuruwitu, Kuruw...","[Core, Core, Core, Core, Core, Core, Core, Cor...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
2,1026_3,"[1090402012_06, 1090402012_02, 1090402012_12, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Ganze, Ganze, Ganze, Ganze, Ganze, Ganze, Gan...","[Palakumi, Palakumi, Palakumi, Palakumi, Palak...","[Palakumi, Palakumi, Palakumi, Palakumi, Palak...","[Core, Core, Core, Core, Core, Core, Core, Cor...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
3,1027_2,"[1090402021_01, 1090402021_08, 1090402021_12, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Ganze, Ganze, Ganze, Ganze, Ganze, Ganze, Gan...","[Palakumi, Palakumi, Palakumi, Palakumi, Palak...","[Mariakani/Vitsapuni, Mariakani/Vitsapuni, Mar...","[Core, Core, Core, Core, Core, Core, Core, Cor...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
4,1029_3,"[1090403021_07, 1090403021_01, 1090403021_08, ...","[Coast, Coast, Coast, Coast, Coast, Coast, Coa...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi, Kilif...","[Ganze, Ganze, Ganze, Ganze, Ganze, Ganze, Gan...","[Dungicha, Dungicha, Dungicha, Dungicha, Dungi...","[Dungicha, Dungicha, Dungicha, Dungicha, Dungi...","[Core, Core, Core, Core, Core, Core, Core, Cor...","[Rural, Rural, Rural, Rural, Rural, Rural, Rur...",...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."


In [42]:
def fuel_usage(row: pd.Series):
    result = []
    cluster = [
        "i_23_hrs_morning",
        "i_24_hrs_afternoon",
        'i_25_hrs_evening',
        'i_26_hrs_h2o'
    ]
    for j in range(len(row[cluster[0]])):
        result += [[]]

        for i in cluster:
            result[j] += [row[i][j]]
        result[j] = sum(result[j])

    return result

def is_clean_fuel(questionnaire, ref_question):
    def inner(row: pd.Series):
        max_usage = max(row['Fuel_usage'])
        pos_max = row['Fuel_usage'].index(max_usage)

        if questionnaire != 'zambia':
            if row[ref_question][pos_max] in CLEAN_FUELS:
                check_clean = 'Yes'
            else:
                check_clean = 'No'
        else:
            if row[ref_question][pos_max] in CLEAN_FUELS_Zambia:
                check_clean = 'Yes'
            else:
                check_clean = 'No'
        return check_clean

    return inner

In [43]:
MTF_HH_Cooking_Data_Final = MTF_HH_Cooking_Data_Final.new_feature("Fuel_usage", fuel_usage)
MTF_HH_Cooking_Data_Final = MTF_HH_Cooking_Data_Final.new_feature("Clean_fuel",
                                                                  is_clean_fuel('kenya', 'i_18_a_1st_fuel'))

In [44]:
MTF_HH_Cooking_Data_Final = MTF_HH_Cooking_Data_Final.select(["cluuq", "Clean_fuel"])

## MTF_HH_Sec.M1_Asset_Data_Final

In [45]:

M1_Asset_Data_Final = ODEDataset('kenya/MTF_HH_Sec.M1_Asset_Data_Final').from_csv(
    "../playground/data/ESMAP/kenya/MTF_HH_Sec.M1_Asset_Data_Final.csv", encoding='ISO-8859-1')

M1_Asset_Data_Final = M1_Asset_Data_Final.group_by("cluuq")
M1_Asset_Data_Final.to_dataframe().head()



Unnamed: 0,cluuq,HH_code,prov,cty,dist,div,loc,subloc,class,locality_urp,...,marg,grid_loc,m_a_types,m_a_label,m_1_other,m_1_a,m_1_b_source,m_1_b_source_other,PARENT_KEY,KEY
0,0,[1090105011_23],[Coast],[Kilifi],[Kilifi],[Bahari],[Township],[Hospital],[Core],[Rural],...,[better-off marginal],[Rural without grid access],[Motorcycle],[Motorcycle],[nan],[1.0],[11.0],[nan],[uuid:f86da9ca-8ecb-44a5-beca-a69b2bdcd352],[uuid:f86da9ca-8ecb-44a5-beca-a69b2bdcd352/m-m...
1,1001_3,"[1090202021_09, 1090202021_10, 1090202021_01, ...","[Coast, Coast, Coast, Coast, Coast]","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi]","[Kilifi, Kilifi, Kilifi, Kilifi, Kilifi]","[Kikambala, Kikambala, Kikambala, Kikambala, K...","[Junju, Junju, Junju, Junju, Junju]","[Kuruwitu, Kuruwitu, Kuruwitu, Kuruwitu, Kuruw...","[Core, Core, Core, Core, Core]","[Rural, Rural, Rural, Rural, Rural]",...,"[better-off marginal, better-off marginal, bet...","[Rural without grid access, Rural without grid...","[Other equipment, Other equipment, Bicycle, Bi...","[Other equipment, Other equipment, Bicycle, Bi...","[10, 9, nan, nan, 9]","[1.0, 1.0, 1.0, 0.0, 3.0]","[13.0, 13.0, 13.0, 13.0, 13.0]","[nan, nan, nan, nan, nan]","[uuid:14ca78d4-a486-4a79-8613-facfb5eb36c5, uu...",[uuid:14ca78d4-a486-4a79-8613-facfb5eb36c5/m-m...
2,1026_3,"[1090402012_07, 1090402012_05, 1090402012_08]","[Coast, Coast, Coast]","[Kilifi, Kilifi, Kilifi]","[Kilifi, Kilifi, Kilifi]","[Ganze, Ganze, Ganze]","[Palakumi, Palakumi, Palakumi]","[Palakumi, Palakumi, Palakumi]","[Core, Core, Core]","[Rural, Rural, Rural]",...,"[better-off marginal, better-off marginal, bet...","[Rural without grid access, Rural without grid...","[Bicycle, Bicycle, Motorcycle]","[Bicycle, Bicycle, Motorcycle]","[nan, nan, nan]","[2.0, 1.0, 2.0]","[13.0, 13.0, 11.0]","[nan, nan, nan]","[uuid:767bd398-e5a0-423e-96a3-c662454484a5, uu...",[uuid:767bd398-e5a0-423e-96a3-c662454484a5/m-m...
3,1027_2,"[1090402021_08, 1090402021_12]","[Coast, Coast]","[Kilifi, Kilifi]","[Kilifi, Kilifi]","[Ganze, Ganze]","[Palakumi, Palakumi]","[Mariakani/Vitsapuni, Mariakani/Vitsapuni]","[Core, Core]","[Rural, Rural]",...,"[better-off marginal, better-off marginal]","[Rural without grid access, Rural without grid...","[Bicycle, Motorcycle]","[Bicycle, Motorcycle]","[nan, nan]","[1.0, 1.0]","[13.0, 11.0]","[nan, nan]","[uuid:0f24cf78-de6a-4e60-b1dc-0264b524d4fa, uu...",[uuid:0f24cf78-de6a-4e60-b1dc-0264b524d4fa/m-m...
4,1029_3,"[1090403021_03, 1090403021_12]","[Coast, Coast]","[Kilifi, Kilifi]","[Kilifi, Kilifi]","[Ganze, Ganze]","[Dungicha, Dungicha]","[Dungicha, Dungicha]","[Core, Core]","[Rural, Rural]",...,"[better-off marginal, better-off marginal]","[Rural without grid access, Rural without grid...","[Bicycle, Bicycle]","[Bicycle, Bicycle]","[nan, nan]","[1.0, 1.0]","[13.0, 13.0]","[nan, nan]","[uuid:4c7d446b-88fb-4540-b98c-0293b15af58c, uu...",[uuid:4c7d446b-88fb-4540-b98c-0293b15af58c/m-m...


In [46]:
def asset(ref_question, function_mode):
    def inner(row: pd.Series):
        result = 'No'
        if function_mode == 'kenya':
            if any(t == 1 for t in row[ref_question]):
                result = 'Yes'
        if function_mode == 'nigeria':
            for i in range(len(row['asset_list'])):
                if row['own_asset'][i] == 1 and row[ref_question][i] == 1:
                    result = 'Yes'
        if function_mode == 'zambia':
            for i in range(len(row['mitem'])):
                if row['M1B'][i] == 1 and row[ref_question][i] == 1:
                    result = 'Yes'
        if function_mode == 'tanzania_vehicle':
            if row['hh_m01'][24] > 0 or row['hh_m01'][25] > 0:
                result == 'Yes'
        if function_mode == 'tanzania_livestock_small':
            if row['lf02_01'][6] == 1 or row['lf02_01'][7] == 1 or row['lf02_01'][8] == 1 or row['lf02_01'][12] == 1 or \
                    row['lf02_01'][13] == 1:
                result = 'Yes'
        if function_mode == 'tanzania_livestock_large':
            if row['lf02_01'][0] == 1 or row['lf02_01'][1] == 1 or row['lf02_01'][2] == 1 or row['lf02_01'][3] == 1 or \
                    row['lf02_01'][4] == 1 or row['lf02_01'][5] == 1:
                result = 'Yes'
        return result

    return inner

In [47]:
def re_categorize_list(mapping: dict, column: str):
    def inner(row: pd.Series):
        result = []
        for i in row[column]:
            result += [re_categorize(i, mapping)]
        return result

    return inner



In [48]:
M1_Asset_Data_Final = M1_Asset_Data_Final.new_feature("Ownership_motorized_vehicle_all",
                                                      re_categorize_list(Ownership_motorized_vehicle_original2final,
                                                                         "m_a_label"))
M1_Asset_Data_Final = M1_Asset_Data_Final.new_feature("Ownership_motorized_vehicle",
                                                      asset('Ownership_motorized_vehicle_all', 'kenya'))


In [49]:
M1_Asset_Data_Final = M1_Asset_Data_Final.select(["cluuq", "Ownership_motorized_vehicle"])
M1_Asset_Data_Final.to_dataframe()

Unnamed: 0,cluuq,Ownership_motorized_vehicle
0,0,Yes
1,1001_3,No
2,1026_3,Yes
3,1027_2,Yes
4,1029_3,No
...,...,...
215,992_1,Yes
216,993_1,No
217,995_5,Yes
218,995_6,Yes


## MTF_HH_Sec.M2_Asset_Data_Final

In [50]:
M2_Asset_Data_Final = ODEDataset('kenya/MTF_HH_Sec.M2_Asset_Data_Final').from_csv(
    "../playground/data/ESMAP/kenya/MTF_HH_Sec.M2_Asset_Data_Final.csv", encoding='ISO-8859-1')

M2_Asset_Data_Final = M2_Asset_Data_Final.group_by("cluuq")

M2_Asset_Data_Final = M2_Asset_Data_Final.new_feature("Ownership_small_livestock_all",
                                                      re_categorize_list(Ownership_small_livestock_original2final,
                                                                         "m_a_label"))

M2_Asset_Data_Final = M2_Asset_Data_Final.new_feature("Ownership_large_livestock_all",
                                                      re_categorize_list(Ownership_large_livestock_original2final,
                                                                         "m_a_label"))

M2_Asset_Data_Final = M2_Asset_Data_Final.new_feature("Ownership_small_livestock",
                                                      asset('Ownership_small_livestock_all', 'kenya'))

M2_Asset_Data_Final = M2_Asset_Data_Final.new_feature("Ownership_large_livestock",
                                                      asset('Ownership_large_livestock_all', 'kenya'))





In [51]:
M2_Asset_Data_Final = M2_Asset_Data_Final.select(["cluuq", "Ownership_small_livestock", "Ownership_large_livestock"])

In [52]:
Asset_Data_Final = M1_Asset_Data_Final.merge(M2_Asset_Data_Final, on="cluuq")

Asset_Data_Final.to_csv("../playground/data/ESMAP/kenya/Asset_Data_Final.csv")

<ODEDataset.ODEDataset at 0x76a83a727890>

In [53]:
Kenya = MTF_HH_Core_Survey.merge(MTF_HH_Roster, on="cluuq")
Kenya = Kenya.merge(MTF_HH_Cooking_Data_Final, on="cluuq")
Kenya = Kenya.merge(Asset_Data_Final, on="cluuq")

Kenya.to_csv("../playground/data/ESMAP/kenya/Kenya.csv")

<ODEDataset.ODEDataset at 0x76a82a7c0890>

In [54]:
Kenya.to_dataframe()

Unnamed: 0,cluuq,Exchange_rate_PPP,Survey_date,Actualization_factor_to_2020,Respondent_category,Connection_type,Hours_available_electricity,House_ownership_rental_free,Dwelling_wall,Dwelling_roof,...,Size_of_business_raw,Business_pos_max_revenues,Size_of_business_revenues,Size_of_business_employees_raw,Size_of_business_employees,Seasonal_business,Clean_fuel,Ownership_motorized_vehicle,Ownership_small_livestock,Ownership_large_livestock
0,6427_15,41.634785,2017,1.107734,Household,National grid,,Owned,1.0,0.0,...,"[[[nan, nan, nan, nan, nan, nan, nan, nan, nan...",[42],"a_29_energy_sources4 [nan, nan, nan, nan, n...","[[[nan, nan, nan, nan, nan, nan, nan, nan, nan...","a_29_energy_sources4 [nan, nan, nan, nan, n...",No,Yes,Yes,No,Yes
1,6427_15,41.634785,2017,1.107734,Household,National grid,,Owned,1.0,0.0,...,"[[[nan, nan, nan, nan, nan, nan, nan, nan, nan...",[42],"a_29_energy_sources4 [nan, nan, nan, nan, n...","[[[nan, nan, nan, nan, nan, nan, nan, nan, nan...","a_29_energy_sources4 [nan, nan, nan, nan, n...",No,Yes,Yes,No,Yes
2,6427_15,41.634785,2017,1.107734,Household,National grid,,Owned,1.0,0.0,...,"[[[nan, nan, nan, nan, nan, nan, nan, nan, nan...",[42],"a_29_energy_sources4 [nan, nan, nan, nan, n...","[[[nan, nan, nan, nan, nan, nan, nan, nan, nan...","a_29_energy_sources4 [nan, nan, nan, nan, n...",No,Yes,Yes,No,Yes
3,6427_15,41.634785,2017,1.107734,Household,National grid,,Owned,1.0,0.0,...,"[[[nan, nan, nan, nan, nan, nan, nan, nan, nan...",[42],"a_29_energy_sources4 [nan, nan, nan, nan, n...","[[[nan, nan, nan, nan, nan, nan, nan, nan, nan...","a_29_energy_sources4 [nan, nan, nan, nan, n...",No,Yes,Yes,No,Yes
4,6427_15,41.634785,2017,1.107734,Household,National grid,,Owned,1.0,0.0,...,"[[[nan, nan, nan, nan, nan, nan, nan, nan, nan...",[42],"a_29_energy_sources4 [nan, nan, nan, nan, n...","[[[nan, nan, nan, nan, nan, nan, nan, nan, nan...","a_29_energy_sources4 [nan, nan, nan, nan, n...",No,Yes,Yes,No,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17961,37_35,41.634785,2017,1.107734,Household,National grid,9.0,Rented,1.0,0.0,...,"[nan, nan]",,,"[nan, nan]",,,No,Yes,No,Yes
17962,37_35,41.634785,2017,1.107734,Household,National grid,9.0,Rented,1.0,0.0,...,"[nan, nan]",,,"[nan, nan]",,,No,Yes,No,Yes
17963,37_35,41.634785,2017,1.107734,Household,National grid,9.0,Rented,1.0,0.0,...,"[nan, nan]",,,"[nan, nan]",,,No,Yes,No,Yes
17964,37_35,41.634785,2017,1.107734,Household,National grid,9.0,Rented,1.0,0.0,...,"[nan, nan]",,,"[nan, nan]",,,No,Yes,No,Yes
