# Load Data

> Functions that load the data for the map.   

In [None]:
#| default_exp load_data

In [None]:
#| export
from nbdev.showdoc import *
import geopandas as gpd
import pandas as pd
from git import Repo
import json

In [None]:
#| hide
repo = Repo('.', search_parent_directories=True)
fp = str(repo.working_tree_dir) + "/testData/"

# Check that the files exist using fastcore (both census and geo)

## Load the geography and census data
>  Solomons islands geography data is organised at the levels
> - adm0 - The country as as whole, Solomon Islands
> - adm1 - Also referred to as the province e.g. Honiara, Malaita
> - adm2 - The Consituency e.g. Central Honiara
> - adm3 - Ward, the smallest geography I am reporting. E.g. Cruz
> Solomon islands census data has been used from the 2009 and 2019 census. For the respective census:
> 2009
> - We have the total population in for each of the administration regions
>  2019
>  - There is only data available down to the province level

In [None]:
#| export
class SolomonGeo:
    # TODO work out how to format the attributes
    # Look at nbdev docs maybe?
    # TODO change all data to int?
    '''
    Load the solomon islands geography data 
    Attributes:
        adm3    Geopandas dataframe containing admin 3 geographies.
    '''
    def __init__(self):
        self.adm3 = self.elt('ward', '2009')
        #self.adm3 = self.elt('constituency', '2009')

    def elt(self, 
            aggregation:str, # Inicates the aggregation of the data
            year:str, # The year of that data, only relevant for census data
           )-> gpd.GeoDataFrame: # The geojason dataset for given aggregation
        '''
        Load and transform given filepath into a geojason geopandas dataframe
        '''
        repo = Repo('.', search_parent_directories=True)
        pw = str(repo.working_tree_dir) + "/testData/"
        
        geo = self.load_geo(pw + 'sol_geo_' + aggregation + '.json')
        df = self.load_census(pw + 'sol_census_' + aggregation + '_' + year + '.csv')
        # Add a column that indicates level of aggregation
        geo['agg'] = aggregation
        adm3 = geo.merge(df, on=['id', 'geo_name']).set_index("geo_name")
        return adm3

    def load_geo(self, pw:str, # The pathway to the dataset
           )-> gpd.GeoDataFrame: # The geojason dataset for given aggregation
        '''
        Load and transform given filepath into a geojason geopandas dataframe
        '''
        geo = gpd.read_file(pw)
        # Rename columns and keep only necessary ones.
        # Note that id can be province id, contsituency id etc.
        geo.columns = geo.columns.str.replace(r'^[a-zA-Z]+name$', 'geo_name', case = False, regex = True)
        geo.rename(columns = {geo.columns[0]:'id'}, inplace=True)
        geo = geo[['id', 'geo_name', 'geometry']]
        return geo

    def load_census(self, pw:str, # Pathway of the dataset
           )-> pd.DataFrame: # A pandas dataframe
        '''
        Load and transform data from filepath into pandas dataset
        '''
        df = pd.read_csv(pw)
        # Remove any missing 
        df = df.dropna()
        # Rename columns to be consistent across geography
        df.columns = df.columns.str.replace(r'^[a-zA-Z]+_name$', 'geo_name', case = False, regex = True)
        df['id'] = df['id'].astype(int).astype(str)  # Change type of id
        return df


    def get_geojson(self,
                   ) -> dict: # Geo JSON formatted dataset
        '''
        A getter method for the GeoDataFrame that returns a Geo JSON
        '''
        return json.loads(self.adm3.to_json())


In [None]:
#| hide
show_doc(SolomonGeo)

---

[source](https://github.com/Gippers/SolomonIslandsDataMap/blob/main/SolomonIslandsDataMap/load_data.py#L14){target="_blank" style="float:right; font-size:smaller"}

### SolomonGeo

>      SolomonGeo ()

Load the solomon islands geography data 
Attributes:
    adm3    Geopandas dataframe containing admin 3 geographies.

In [None]:
#| hide
show_doc(SolomonGeo.get_geojson)

---

[source](https://github.com/Gippers/SolomonIslandsDataMap/blob/main/SolomonIslandsDataMap/load_data.py#L60){target="_blank" style="float:right; font-size:smaller"}

### SolomonGeo.get_geojson

>      SolomonGeo.get_geojson ()

A getter method for the GeoDataFrame that returns a Geo JSON

# Testing

In [None]:
sol_geo = SolomonGeo()

In [None]:
sol_geo.adm3

Unnamed: 0_level_0,id,geometry,agg,male_pop,female_pop,total_pop
geo_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
West Guadalcanal,634,"MULTIPOLYGON (((159.87989 -9.61400, 159.95452 ...",constituency,5007.0,4562.0,9569.0
Temotu Vatu,950,"MULTIPOLYGON (((168.84036 -12.28504, 168.84096...",constituency,2077.0,2180.0,4257.0
South Guadalcanal,635,"MULTIPOLYGON (((160.15592 -9.81770, 160.15565 ...",constituency,3324.0,3415.0,6739.0
East Guadalcanal,636,"MULTIPOLYGON (((160.85478 -9.83830, 160.85495 ...",constituency,5002.0,5229.0,10231.0
North West Choiseul,102,"MULTIPOLYGON (((156.62512 -6.89156, 156.62507 ...",constituency,6147.0,5808.0,11955.0
East AreAre,726,"MULTIPOLYGON (((161.30468 -9.37111, 161.30491 ...",constituency,3567.0,3532.0,7099.0
North West Guadalcanl,633,"MULTIPOLYGON (((159.83141 -9.34130, 159.83196 ...",constituency,11231.0,10193.0,21424.0
East Choiseul,103,"MULTIPOLYGON (((157.55656 -7.31207, 157.55613 ...",constituency,3078.0,2904.0,5982.0
West Areare,727,"MULTIPOLYGON (((161.23117 -9.45617, 161.23236 ...",constituency,3621.0,3579.0,7200.0
Small Malaita,728,"MULTIPOLYGON (((161.55176 -9.55218, 161.55195 ...",constituency,6484.0,6483.0,12967.0


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()