In [1]:
import pandas as pd
import plotly.express as px
import geopandas as gpd

## Pandas and GeoJson Prep

In [2]:
def GetDataframe():
    df_Raw = pd.read_csv('../Datasets/Family Income and Expenditure.csv')

    #Remove whitespaces like in ' ARMM'
    df_Raw['Region'] = df_Raw['Region'].apply(lambda x: x.strip())

    #Change the food column name
    df_Raw.rename(columns={
                    'Bread and Cereals Expenditure':'Bread and Cereals',
                    'Meat Expenditure':'Meat',
                    'Total Rice Expenditure':'Rice', 
                    'Total Fish and  marine products Expenditure':'Seafood', 
                    'Fruit Expenditure':'Fruits', 
                    'Vegetables Expenditure':'Vegetables'
                  },
                  inplace=True
                 )
    return df_Raw

In [3]:
def GetFoodLabels():
    return ['Bread and Cereals','Meat', 'Rice', 'Seafood', 'Fruits', 'Vegetables']

In [4]:
def GetGeoJson():
    gj_PhMap = gpd.read_file('../Datasets/country.0.001.json')

    # MATCH REGION NAME TO OUR CSV 
    gj_PhMap['adm1_en'] = gj_PhMap['adm1_en'].replace(
        ['Region I (Ilocos Region)', 'Region II (Cagayan Valley)',
           'Region III (Central Luzon)', 'Region IV-A (CALABARZON)',
           'Region V (Bicol Region)', 'Region VI (Western Visayas)',
           'Region VII (Central Visayas)', 'Region VIII (Eastern Visayas)',
           'Region IX (Zamboanga Peninsula)', 'Region X (Northern Mindanao)',
           'Region XI (Davao Region)', 'Region XII (SOCCSKSARGEN)',
           'National Capital Region (NCR)',
           'Cordillera Administrative Region (CAR)', 'Region XIII (Caraga)',
           'MIMAROPA Region',
           'Bangsamoro Autonomous Region In Muslim Mindanao (BARMM)']
        ,
    
        ['I - Ilocos Region', 'II - Cagayan Valley',
         'III - Central Luzon', 'IVA - CALABARZON',
         'V - Bicol Region', 'VI - Western Visayas', 
         'VII - Central Visayas', 'VIII - Eastern Visayas',
         'IX - Zasmboanga Peninsula', 'X - Northern Mindanao',
         'XI - Davao Region', 'XII - SOCCSKSARGEN',
         'NCR',
         'CAR', 'Caraga',
         'IVB - MIMAROPA', 
         'ARMM'])
    return gj_PhMap

# All Graphs

## Choropleth of Income

In [5]:
def GetFig_Choropleth(ref_Dataframe, ref_GeoJson, agg_Mode):
    match agg_Mode:
        case 'Max':
           agg_Income = ref_Dataframe.groupby('Region')['Total Household Income'].max()
        case 'Min':
           agg_Income = ref_Dataframe.groupby('Region')['Total Household Income'].min()
        case 'Median':
           agg_Income = ref_Dataframe.groupby('Region')['Total Household Income'].median()
        case default:
           agg_Income = ref_Dataframe.groupby('Region')['Total Household Income'].mean()
    
    
    fig = px.choropleth_mapbox(
        data_frame = agg_Income,
        geojson = ref_GeoJson,
        locations = agg_Income.index, #'Region',
        featureidkey = 'properties.adm1_en',
        color = agg_Income.values, #'Total Household Income',
        center = {'lat': 12.738500, 'lon': 121.766632},
        mapbox_style= 'carto-positron',
        zoom = 4,
        opacity = 0.3,
        height=600,
        labels={'color':'Php'},
        title='Income per Region'
    )
    return fig

## Pie Chart of Top 10 Jobs

In [6]:
def GetFig_MostEmployedJobs(ref_Dataframe, region_Name):
    df_AllJobs = ref_Dataframe[ref_Dataframe['Region'] == region_Name]['Household Head Occupation'].value_counts()
    df_Top10Jobs = df_AllJobs[:10]
    #df_Top10Jobs['Others'] = df_AllJobs[10:].sum()
    #print(df_Top10Jobs)
    #fig = px.histogram(data_frame=df_Top10Jobs, x=df_Top10Jobs.index, y= df_Top10Jobs.values)
    
    fig = px.pie(df_Top10Jobs, values = df_Top10Jobs.values, names = df_Top10Jobs.index, title='Top 10 Most Employed Jobs in ' + region_Name, hover_name=df_Top10Jobs.index)
    return fig

## Stacked Barchart of Food Breakdown per Region

In [7]:
def GetFig_FoodBreakdown(ref_Dataframe, selected_Foods):
    avg_FoodExpenses = ref_Dataframe.groupby('Region')[selected_Foods].mean()
    
    fig = px.bar(data_frame = avg_FoodExpenses, x=avg_FoodExpenses.index, y=selected_Foods, 
                 title='Food Expenses per Region', labels={'x': 'Regions', 'value':'Php', 'variable':'Food Expenses'}) 
    fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
    return fig

## Stacked Barchart of Income vs Expenses

In [8]:
def GetFig_IncomeVsExpenses(ref_Dataframe, selected_Regions):
    queryCols = ['Total Household Income', 'Total Food Expenditure', 
               'Restaurant and hotels Expenditure', 'Alcoholic Beverages Expenditure',
               'Tobacco Expenditure', 'Clothing, Footwear and Other Wear Expenditure',
               'Housing and water Expenditure', 'Imputed House Rental Value',
               'Medical Care Expenditure', 'Transportation Expenditure',
               'Communication Expenditure', 'Education Expenditure',
               'Miscellaneous Goods and Services Expenditure',
               'Special Occasions Expenditure', 'Crop Farming and Gardening expenses']
    queryCols.append('Region')
    
    df_IncomeAndExpenses = ref_Dataframe[queryCols].copy()
    
    # GET THE MEDIAN OF THE REGIONS
    queryCols.remove('Region')
    aggByRegion = df_IncomeAndExpenses.groupby('Region')[queryCols].median().reset_index()
    
    # FILTER TO ONLY OUR SELECTED REGIONS
    aggByRegion = aggByRegion[aggByRegion['Region'].apply(lambda x: x in selected_Regions)]
    
    # MAKE EXPENSES NEGATIVE
    aggByRegion[queryCols] *= -1
    aggByRegion['Total Household Income'] *= -1
    
    # PLOT THE GRAPH
    fig = px.bar(data_frame = aggByRegion, x='Region', y=queryCols, barmode='relative',
                 title = 'Median Income and Expenses per Region',
                 height = 600,
                 labels={
                    'value' : 'Php',
                    'variable' : 'Income and Expenses'
                })
    fig.update_layout(xaxis={'categoryorder':'total descending'})
    return fig