# Load Data

> Functions that load the data for the map.   

In [None]:
#| default_exp load_data

In [None]:
#| export
from nbdev.showdoc import *
import geopandas as gp
import pandas as pd
from git import Repo

In [None]:
#| hide
repo = Repo('.', search_parent_directories=True)
fp = str(repo.working_tree_dir) + "/testData/"

# Check that the files exist using fastcore (both census and geo)

## Load the geography data
>  Solomons islands geography data is organised at the levels
> - adm0 - The country as as whole, Solomon Islands
> - adm1 - Also referred to as the province e.g. Honiara, Malaita
> - adm2 - The Consituency e.g. Central Honiara
> - adm3 - Ward, the smallest geography I am reporting. E.g. Cruz

In [None]:
#| export
class SolomonGeo:
    # TODO work out how to format this?
    # Look at nbdev docs maybe?
    '''
    Load the solomon islands geography data 
    Attributes:
        adm3    Geopandas dataframe containing admin 3 geographies.
    '''
    def __init__(self):
        repo = Repo('.', search_parent_directories=True)
        fp = str(repo.working_tree_dir) + "/testData/"
        self.adm3 = self.elt(fp)

    def elt(self, pw:str, # The pathway to the dataset
           )-> gp.GeoDataFrame: # The geojason dataset for given aggregation
        '''
        Load and transform given filepath into a geejason geopandas dataframe
        '''
        adm3 = gp.read_file('zip://' + pw + 'slb_admbnda_adm3.zip')
        adm3.to_file('myJson.geojson', driver='GeoJSON')
        # Need to set the index, currently using WID
        adm3 = adm3.set_index(adm3["SINSO_WID"].values)
        return adm3

In [None]:
#| hide
show_doc(SolomonGeo)

---

[source](https://github.com/Gippers/SolomonIslandsDataMap/blob/main/SolomonIslandsDataMap/load_data.py#L31){target="_blank" style="float:right; font-size:smaller"}

### SolomonGeo

>      SolomonGeo ()

Load the solomon islands geography data 
Attributes:
    adm3    Geopandas dataframe containing admin 3 geographies.

In [None]:
# TODO, how can I tell people about the attributes?

## Load the Census data

In [None]:
#| export
class SolomonCensus:
    '''
    Load the solomon islands census data 
    '''
    def __init__(self):
        repo = Repo('.', search_parent_directories=True)
        fp = str(repo.working_tree_dir) + "/testData/"
        self.data = self.elt(fp)

    def elt(self, pw:str, # The pathway to the dataset
           )-> pd.pandas: # Return the pandas dataset
        '''
        Load and transform data from filepath into pandas dataset
        '''
        df = pd.read_csv(fp + 'sol_census_2009_ward.csv')

        # TODO load all the files and merge them together?
        return df




In [None]:
show_doc(SolomonCensus)

---

### SolomonCensus

>      SolomonCensus ()

Load the solomon islands census data

# Testing

In [None]:
sol_geo = SolomonGeo()

In [None]:
sol_geo.adm3.head()

Unnamed: 0,OBJECTID,SINSO_WID,ADM3_PCODE,ADM3_NAME,ADM2_PCODE,ADM2_NAME,ADM1_PCODE,ADM1_NAME,ADM0_PCODE,COUNTRY,SINSO_CID,SINSO_PID,geometry
1001,1,1001,SB1010431001,Nggosi,SB101043,West Honiara,SB10,Honiara,SB,Solomon Islands,1043,10,"POLYGON Z ((1104818.130 -1046772.426 0.000, 11..."
1002,2,1002,SB1010431002,Mbumburu,SB101043,West Honiara,SB10,Honiara,SB,Solomon Islands,1043,10,"POLYGON Z ((1107079.803 -1048040.379 0.000, 11..."
1003,3,1003,SB1010431003,Rove - Lengakiki,SB101043,West Honiara,SB10,Honiara,SB,Solomon Islands,1043,10,"POLYGON Z ((1106918.942 -1047270.637 0.000, 11..."
1004,4,1004,SB1010421004,Cruz,SB101042,Central Honiara,SB10,Honiara,SB,Solomon Islands,1042,10,"POLYGON Z ((1107949.631 -1047995.867 0.000, 11..."
1005,5,1005,SB1010421005,Vavaea,SB101042,Central Honiara,SB10,Honiara,SB,Solomon Islands,1042,10,"POLYGON Z ((1108708.612 -1047377.358 0.000, 11..."


In [None]:
sol_census = SolomonCensus()

FileNotFoundError: [Errno 2] No such file or directory: '/home/tom/git/SolomonIslandsDataMap/testData/solomon_census_2009_ward.csv'

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()