# Load Data

> Functions that load the data for the map.   

In [None]:
#| default_exp load_data

In [None]:
#| export
from nbdev.showdoc import *
import geopandas as gpd
import pandas as pd
from git import Repo
import json

In [None]:
#| hide
repo = Repo('.', search_parent_directories=True)
fp = str(repo.working_tree_dir) + "/testData/"

# Check that the files exist using fastcore (both census and geo)

## Load the geography and census data
>  Solomons islands geography data is organised at the levels
> - adm0 - The country as as whole, Solomon Islands
> - adm1 - Also referred to as the province e.g. Honiara, Malaita
> - adm2 - The Consituency e.g. Central Honiara
> - adm3 - Ward, the smallest geography I am reporting. E.g. Cruz
> Solomon islands census data has been used from the 2009 and 2019 census. For the respective census:
> 2009
> - We have the total population in for each of the administration regions
>  2019
>  - There is only data available down to the province level

In [None]:
#| export
class SolomonGeo:
    # TODO work out how to format this?
    # Look at nbdev docs maybe?
    '''
    Load the solomon islands geography data 
    Attributes:
        adm3    Geopandas dataframe containing admin 3 geographies.
    '''
    def __init__(self):
        self.adm3 = self.elt('ward', '2009')

    def elt(self, 
            aggregation:str, # Inicates the aggregation of the data
            year:str, # The year of that data, only relevant for census data
           )-> gpd.GeoDataFrame: # The geojason dataset for given aggregation
        '''
        Load and transform given filepath into a geejason geopandas dataframe
        '''
        repo = Repo('.', search_parent_directories=True)
        pw = str(repo.working_tree_dir) + "/testData/"
        
        geo = self.load_geo(pw + 'geo_' + aggregation + '.json')
        df = self.load_census(pw + 'census_' + aggregation + '_' + year + '.csv')
        adm3 = geo.merge(df, on="WID").set_index("ward_name")
        return adm3

    def load_geo(self, pw:str, # The pathway to the dataset
           )-> gpd.GeoDataFrame: # The geojason dataset for given aggregation
        '''
        Load and transform given filepath into a geejason geopandas dataframe
        '''
        geo = gpd.read_file(pw)
        #geo = adm3.set_index(adm3["WID"].values)
        return geo

    def load_census(self, pw:str, # Pathway of the dataset
           )-> pd.DataFrame: # A pandas dataframe
        '''
        Load and transform data from filepath into pandas dataset
        '''
        df = pd.read_csv(pw)
        df['id'] = df['id'].apply(str)  # Change type of id
        df = df.rename(columns = {'id':'WID'})
        return df


    def get_geojson(self,
                   ) -> dict: # Geo JSON formatted dataset
        '''
        A getter method for the GeoDataFrame that returns a Geo JSON
        '''
        return json.loads(self.adm3.to_json())


In [None]:
#| hide
show_doc(SolomonGeo)

---

[source](https://github.com/Gippers/SolomonIslandsDataMap/blob/main/SolomonIslandsDataMap/load_data.py#L14){target="_blank" style="float:right; font-size:smaller"}

### SolomonGeo

>      SolomonGeo ()

Load the solomon islands geography data 
Attributes:
    adm3    Geopandas dataframe containing admin 3 geographies.

In [None]:
#| hide
show_doc(SolomonGeo.get_geojson)

---

[source](https://github.com/Gippers/SolomonIslandsDataMap/blob/main/SolomonIslandsDataMap/load_data.py#L57){target="_blank" style="float:right; font-size:smaller"}

### SolomonGeo.get_geojson

>      SolomonGeo.get_geojson ()

A getter method for the GeoDataFrame that returns a Geo JSON

# Testing

In [None]:
sol_geo = SolomonGeo()

TypeError: SolomonGeo.elt() missing 2 required positional arguments: 'aggregation' and 'year'

In [None]:
sol_geo.adm3

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()