# Load Data

> Functions that load the data for the map.   

In [None]:
#| default_exp load_data

In [None]:
#| export
from nbdev.showdoc import *
import geopandas as gp
import pandas as pd
from git import Repo
import json

In [None]:
#| hide
repo = Repo('.', search_parent_directories=True)
fp = str(repo.working_tree_dir) + "/testData/"

# Check that the files exist using fastcore (both census and geo)

## Load the geography data
>  Solomons islands geography data is organised at the levels
> - adm0 - The country as as whole, Solomon Islands
> - adm1 - Also referred to as the province e.g. Honiara, Malaita
> - adm2 - The Consituency e.g. Central Honiara
> - adm3 - Ward, the smallest geography I am reporting. E.g. Cruz

In [None]:
#| export
class SolomonGeo:
    # TODO work out how to format this?
    # Look at nbdev docs maybe?
    '''
    Load the solomon islands geography data 
    Attributes:
        adm3    Geopandas dataframe containing admin 3 geographies.
    '''
    def __init__(self):
        repo = Repo('.', search_parent_directories=True)
        fp = str(repo.working_tree_dir) + "/testData/"
        self.adm3 = self.elt(fp)

    def elt(self, pw:str, # The pathway to the dataset
           )-> gp.GeoDataFrame: # The geojason dataset for given aggregation
        '''
        Load and transform given filepath into a geejason geopandas dataframe
        '''
        adm3 = gp.read_file(pw + '2009_PHC_Solomons_Ward_4326')
        #adm3.to_file('myJson.geojson', driver='GeoJSON')
        # Need to set the index, currently using WID
        adm3 = adm3.set_index(adm3["WID"].values)
        return adm3

    def get_geojson(self,) -> dict: # Geo JSON formatted dataset
        '''
        A getter method for geography as a Geo JSON
        '''
        return json.loads(self.adm3.to_json())


In [None]:
#| hide
show_doc(SolomonGeo)

---

[source](https://github.com/Gippers/SolomonIslandsDataMap/blob/main/SolomonIslandsDataMap/load_data.py#L13){target="_blank" style="float:right; font-size:smaller"}

### SolomonGeo

>      SolomonGeo ()

Load the solomon islands geography data 
Attributes:
    adm3    Geopandas dataframe containing admin 3 geographies.

In [None]:
#| hide
show_doc(SolomonGeo.get_geojson)

---

### SolomonGeo.get_geojson

>      SolomonGeo.get_geojson ()

A getter method for geography as a Geo JSON

## Load the Census data

In [None]:
#| export
class SolomonCensus:
    '''
    Load the solomon islands census data 
    '''
    def __init__(self):
        repo = Repo('.', search_parent_directories=True)
        fp = str(repo.working_tree_dir) + "/testData/"
        self.data = self.elt(fp)

    def elt(self, pw:str, # The pathway to the dataset
           )-> pd.pandas: # Return the pandas dataset
        '''
        Load and transform data from filepath into pandas dataset
        '''
        df = pd.read_csv(pw + 'sol_census_2009_ward.csv')
        df['id'] = df['id'].apply(str)  # Change type of id
        # TODO load all the files and merge them together?
        return df




In [None]:
show_doc(SolomonCensus)

---

[source](https://github.com/Gippers/SolomonIslandsDataMap/blob/main/SolomonIslandsDataMap/load_data.py#L38){target="_blank" style="float:right; font-size:smaller"}

### SolomonCensus

>      SolomonCensus ()

Load the solomon islands census data

# Testing

In [None]:
sol_geo = SolomonGeo()

In [None]:
sol_census = SolomonCensus()
sol_census.data.dtypes

id            object
ward_name     object
male_pop       int64
female_pop     int64
total_pop      int64
dtype: object

In [None]:
sol_census.data

Unnamed: 0,id,ward_name,male_pop,female_pop,total_pop
0,1001,Nggosi,5240,4822,10062
1,1002,Mbumburu,1912,1713,3625
2,1003,Rove - Lengakiki,1464,1149,2613
3,1004,Cruz,125,107,232
4,1005,Vavaea,3788,3208,6996
...,...,...,...,...,...
178,913,Duff Islands,262,249,511
179,914,Utupua,586,582,1168
180,915,Vanikoro,625,668,1293
181,916,Tikopia,604,681,1285


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()