# Load Data

> Functions that load the data for the map.   

**Contents**

`Solomon Geospatial Data`
- `SolomonGeo`: A class that cleans the solomon islandas census and geography data
- `SolomonGeo.read_test`: Loads and transforms the test data
- `SolomonGeo.get_geojson`: Returns the geo_df as a geojson datatset

In [None]:
#| default_exp load_data

In [None]:
#| export
from nbdev.showdoc import *
import geopandas as gpd
import pandas as pd
import numpy as np
from git import Repo
import json
from fastcore import *
from fastcore.basics import patch
from fastcore.test import *
import sys
import topojson as tp
import pickle
from urllib.request import urlopen
import boto3
from dotenv import load_dotenv
from dash import dcc
import os
import copy


load_dotenv()

True

In [None]:
#| hide
repo = Repo('.', search_parent_directories=True)
fp = str(repo.working_tree_dir) + "/testData/"
const_df = pd.read_csv(fp + 'full_sol_census_2009.csv', encoding = "ISO-8859-1")
# Check that the files exist using fastcore (both census and geo)

## Solomon Geospatial Data
> Load the geography and census data
### Geography Data
Solomons islands geography data is organised at the levels
 - adm0 - The country as as whole, Solomon Islands
 - adm1 - Also referred to as the province e.g. Honiara, Malaita
 - adm2 - The Consituency e.g. Central Honiara
 - adm3 - Ward, the smallest geography I am reporting. E.g. Cruz

### Census Data
 Solomon islands census data has been used from the 2009 and 2019 census. For the respective census:
2009
 - We have the total population in for each of the administration regions
2019
 - There is only data available down to the province level

### Test that the .env variables exist

In [None]:
#| hide
ACCESS_KEY = os.getenv("ACCESS_KEY")
SECRET_ACCCESS_KEY = os.getenv("SECRET_ACCESS_KEY")
REGION_NAME = os.getenv("REGION_NAME")
if len(ACCESS_KEY) == 0:
    # If not in .env, then use environment variables
    ACCESS_KEY = os.environ["ACCESS_KEY"]
    SECRET_ACCCESS_KEY = os.environ["SECRET_ACCESS_KEY"]
    REGION_NAME = os.environ["REGION_NAME"]

In [None]:
# Test that the environment variables can be loaded
test(ACCESS_KEY, None, nequals)
test(SECRET_ACCCESS_KEY, None, nequals)
test(REGION_NAME, None, nequals)
test_eq(REGION_NAME, 'ap-southeast-2')

## Function for connecting to AWS S3 client

In [None]:
#| export
def s3_client()-> boto3.client:
    '''Return a connection to teh AWS s3 client'''
    ACCESS_KEY = os.getenv("ACCESS_KEY")
    SECRET_ACCCESS_KEY = os.getenv("SECRET_ACCESS_KEY")
    REGION_NAME = os.getenv("REGION_NAME")
    if len(ACCESS_KEY) == 0:
        # If not in .env, then use environment variables
        ACCESS_KEY = os.environ["ACCESS_KEY"]
        SECRET_ACCCESS_KEY = os.environ["SECRET_ACCESS_KEY"]
        REGION_NAME = os.environ["REGION_NAME"]
    session = boto3.Session(region_name='ap-southeast-2')
    # Creating the low level functional client
    return session.client(
        's3',
        endpoint_url='https://s3.ap-southeast-2.amazonaws.com',
        aws_access_key_id = ACCESS_KEY,
        aws_secret_access_key = SECRET_ACCCESS_KEY,
        region_name = REGION_NAME,
    )

In [None]:
#| hide
show_doc(s3_client)

---

### s3_client

>      s3_client ()

Return a connection to teh AWS s3 client

## Load and edit Solomons Election Data

In [None]:
#| export
class SolomonGeo:
    # TODO work out how to format the attributes
    # Look at nbdev docs maybe?
    # TODO change all data to int?
    # TODO - should I make this a dataclass for the auto functionaliy? potentially should try it out
    '''
    Load the solomon islands geography data 
    Attributes:
        cen_df    Geopandas dataframe containing geographies and census data
        geo_levels    A list of the types of available aggregations
        census_vars    A dictionary of census variables in the dataset 
        data_type   Specifies whether the variable is a percentage or number
        locations A dictionary of locations accessed by the geography level
    '''
    def __init__(self, 
                cen_df:pd.DataFrame, # A dataset containing the census data,
                pop_df:pd.DataFrame, # A dataset contain the population projection data
                geos:gpd.GeoDataFrame, # A geodataframe containing geographies of data
    ):
        self.census = cen_df
        self.population = pop_df
        self.geo = geos

        # variable that tracks the types of aggregations
        # TODO Workout how to get in right order without hardcoding
        #self.geo_levels = cen_df.loc[:, ('core', 'agg')].unique()
        # TODO make an array!
        self.geo_levels = np.array(['Province', 'Constituency', 'Ward'])

        # Save a list of census variables, ignoring the core variables
        # Use a dictionary that maps the upper level column names to lower level ones
        var_df = cen_df.drop(columns = "core", level=0)
        vars = {}
        for col in var_df.columns:
            if col[0] not in vars:
                vars[col[0]] = [col[1]]
            else:
                vars[col[0]].append(col[1])
        self.census_vars = vars

        # Save a list of population variables, ignoring the core variables
        # Use a dictionary that maps the upper level column names to lower level ones
        var_df = pop_df.drop(columns = ["core", "Age"], level=0)
        vars = {}
        for col in var_df.columns:
            if col[0] not in vars:
                vars[col[0]] = [col[1]]
            else:
                vars[col[0]].append(col[1])
        self.population_vars = vars
        # Seperately save the age groupings
        self.ages = list(np.unique(pop_df['Age']['Age_Bracket'].values))
        self.pop_years = list(np.unique(pop_df['core']['year'].values))

        # TODO Workout how to get in right order without hardcoding
        #self.data_type = cen_df.loc[:, ('core', 'type')].unique()
        self.data_type = np.array(['Total', 'Proportion'])
        # save a list of locations as a dictionary access by geography level
        locations = {}
        for geo in self.geo_levels:
            locations[geo] = cen_df.loc[cen_df['core']['agg'] == geo, ('core', 'location')].unique()
        self.locations = locations
    
        # TODO: need a list of column sub headings: get from column name split by `:`

        self.type_default = 'Total'


    @classmethod
    def read_test(cls,
                 ): # A solmon geo class TODO work out how to return self here... (can't?)
        '''
        Contsructor that initialises the object from files using the local testing data
        '''

        repo = Repo('.', search_parent_directories=True)
        pw = str(repo.working_tree_dir) + "/testData/"
        df = pd.read_csv(pw + 'full_sol_census_2009.csv', encoding = "ISO-8859-1")
        pop = pd.read_csv(pw + 'solo_pop_proj_2009.csv')
        aggs = df.loc[:, 'agg'].unique()
        geos = []
        for agg in aggs:
            geo = gpd.read_file(pw + 'sol_geo_' + agg.lower() + '.json')
            # Add an agg column, as the data and geometry need to be joined by id and agg
            geo.loc[:, 'agg'] = agg
            geos.append(geo)

        ret = cls.__transform(df, pop, geos)
        return cls(
            cen_df = ret[0],
            pop_df = ret[1],
            geos = ret[2],
        )
    
    @classmethod
    def load_pickle(cls,
                    folder:str = "/testData/", #file path of the folder to save in
                    aws:bool = True, # Whether to load from github or local
                    file_name:str = 'sol_geo.pickle' # file name of the saved class
                 ):
        '''
        A constuctor that initialises the object from aws pickle
        '''
        # Create a connection to AWS server
        client = s3_client()

        if aws:
            # Create the S3 object
            obj = client.get_object(
                Bucket = 'hobby-data',
                Key = file_name, 
            )

            # Read in the pickle
            try:
                tmp_geo = pickle.load(obj['Body'])
            except:
                raise ValueError("Issue dowloading pickle file from AWS.")
                
        else:
            # TODO work out how to make this a class method
            repo = Repo('.', search_parent_directories=True)
            pw = str(repo.working_tree_dir) + folder + file_name
            
            with open(pw, 'rb') as f:
                tmp_geo = pickle.load(f)
 
        
        return cls(
            cen_df = gpd.GeoDataFrame(tmp_geo['census']),
            pop_df = gpd.GeoDataFrame(tmp_geo['population']),
            geos = gpd.GeoDataFrame(tmp_geo['geo']),
        )
        
    
    @classmethod
    def gen_stored(cls,
                  json_sol:dict, # A geojson dataset
                 ): # A solmon geo class TODO work out how to return self here... (can't?)
        '''
        A constructor that creates a JSON serialised SolomonGeo object from a stored geopandas dataframe.
        The purpose of this is to allow the object to be stored JSON serialised in a DCC.Store object in 
        the browser before being deserialised and as an object.

        Note that storing and the reloading, will result in dropping the geometry.
        '''
        def df_to_hier(df:pd.DataFrame, # dataframe to convert to hierarchical
                       ) -> pd.DataFrame: # Converted dataframe back to hierachical
            cols = df.columns.str.extract(r'(.*): (.+)', expand=True)
            df.columns = pd.MultiIndex.from_arrays((cols[0], cols[1]))
            df.columns.names = [None]*2
            return df
        
        json_sol = json_sol["data"]

        census = pd.DataFrame(json_sol['census'])
        census = df_to_hier(census)
        # Index is unique by type and geoname
        census = census.set_index(census['core']['location'] + "_" + census['core']["type"] ) 
        census.index.name = 'pk'

        population = pd.DataFrame(json_sol['population'])
        population = df_to_hier(population)
        population.set_index(('core', 'location'), inplace = True)

        geo = gpd.GeoDataFrame(json_sol['geojson'])

        return cls(
            cen_df = census,
            pop_df = population,
            geos = geo,
        )
    
    @classmethod
    def __transform(cls, 
                    df:pd.DataFrame, # The dataframe containing census data
                    pop_df:pd.DataFrame, # The dataset containing the population projection data
                    l_geos:[gpd.GeoDataFrame], # A list of geopandas dataframes containing 
                                                # the geographies 
                 ) -> gpd.GeoDataFrame: # Returns combined dataset
        '''
        Extract and return input datasets from file. Assumes correct format of input dataset, then
        Transform given raw input dataset into a cleaned and combined geopandas dataframe
        '''
        # TODO seperate out the geometry from the data.
        # TODO - make a function that tests that the geo and datasets both join

        geos = gpd.GeoDataFrame()
        for geo in l_geos:
            # Before combining, need to rename like columns
            # Rename columns and keep only necessary ones, Note that id can be province id, contsituency id etc.
            geo.columns = geo.columns.str.replace(r'^[a-zA-Z]+name$', 'geo_name', case = False, regex = True)
            # TODO this assumes the id key column is the first one (which so far it is...)
            geo.rename(columns = {geo.columns[0]:'id'}, inplace=True)

            geo = geo.loc[:, ['id', 'agg', 'geometry']] 

            # simplify the geography, use topo to preserver the topology between shapes
            topo = tp.Topology(geo, prequantize=False)
            geo = topo.toposimplify(360/43200).to_gdf() # old 360/43200

            geos = pd.concat([geos, geo])
            
        # Clean the geospatial dataframe
        geos.loc[:, 'year'] = '2009'
        
        # Clean the census data
        df = df.dropna()
        # Rename columns to be consistent across geography
        df.columns = df.columns.str.replace(r'^[a-zA-Z]+_name$', 'location', case = False, regex = True)
        # id needs to change types twice so that it is a string of an int
        df = df.astype({'id': 'int'})#, 'male_pop':'int', 	'female_pop':'int', 'total_pop':'int'})
        df = df.astype({'id': 'str'})

        pop_df = pop_df.astype({'core: id': 'int'})
        pop_df = pop_df.astype({'core: id': 'str'})

        # Add location names to geography dataset
        locations = copy.copy(df)
        locations = locations.loc[:, ['id', 'agg', 'location']].drop_duplicates()
        geos = geos.merge(locations, on=['id', 'agg'], how = 'left')

        # Index is unique by type and geoname
        df['pk'] = df['location'] + "_" + df["type"] 
        df = df.set_index("pk") 

        # Rename some of the census data
        df = df.rename(columns = {
                                'id':'core: id', 'agg':'core: agg', 'location':'core: location',
                                'year':'core: year', 'type':'core: type'})

        # Test that the datasets all have geographies
        test_geo(df, geos)
        test_geo(pop_df, geos.loc[geos['agg'] == 'Province'])         

        # Convert into a multiindex dataframe, with hiearchical columns
        try:
            cols = df.columns.str.extract(r'(.*): (.+)', expand=True)
            df.columns = pd.MultiIndex.from_arrays((cols[0], cols[1]))
            df.columns.names = [None]*2

            cols2 = pop_df.columns.str.extract(r'(.*): (.+)', expand=True)
            pop_df.columns = pd.MultiIndex.from_arrays((cols2[0], cols2[1])) 
            pop_df.columns.names = [None]*2
        except:
            raise ValueError("Issue converting geopandas dataframe to multindex. \
                             Check that all columns have \': \' beside the following\
                             core columns: geometry, id, agg, year, type.")
        
        # Set index of geography and population data
        geos = geos.set_index(geos.loc[:, 'location']) 
        pop_df.set_index(('core', 'location'), inplace = True)

        # Set all non core and age columns of population to int variables
        # TODO must be a better way to do this
        cols = pop_df.columns.get_level_values(0)
        ignore = ['core', 'Age']
        cols = [c for c in cols if c not in ignore]
        cols = list(set(cols))
        for c1 in cols:
            to_change = pop_df[c1].columns
            for c2 in to_change:
                pop_df[(c1, c2)] = pop_df[(c1, c2)].apply(lambda x: int(x.split()[0].replace(',', '')))

        # Add proportion to the populdation data
        pop_p = copy.copy(pop_df)   
        pop_p.loc[:, ('core', 'type')] = 'Proportion'

        def totalColumn(data:pd.DataFrame, # Dataset
                column:[str], # Columns to manipulate
                ) -> pd.DataFrame:
            '''Used to Create proportions by year'''
            data[column] = data[column] / data[column].agg('sum')
            return data
        
        for col in cols:
            # For each non core and age column:
            pop_p = pop_p.groupby([('core', 'year')], sort = False, group_keys=False).apply(totalColumn, col)
            #pop_p = pop_p.droplevel(0)
            
        pop_df = pd.concat([pop_df, pop_p], axis = 0) # Created extra index, drop
        
        # Sort both datasets, this is critical for putting values back into the map.
        # Sort alphabetically so they are both the same
        df.sort_values(by = [('core', 'location')], inplace = True)
        pop_df.sort_values(by = [('core', 'location'), ('core', 'year'), ('Age', 'Numerical_Bracket')], inplace = True)
        geos.sort_index(inplace = True, sort_remaining = False)  

        #pop_df.index.name = 'index'
        #pop_df.loc[:, ('core', 'location')] = pop_df.index

        # return the transformed dataset
        return df, pop_df, geos


# SolomonGeo Data Class
This data class manipulates and stores the geospatial and regular data relating to each geography. It includes
the necessary data and methods to manipulate that data to build the resulting dash app.

In [None]:
#| hide
show_doc(SolomonGeo)

---

### SolomonGeo

>      SolomonGeo (cen_df:pandas.core.frame.DataFrame,
>                  pop_df:pandas.core.frame.DataFrame,
>                  geos:geopandas.geodataframe.GeoDataFrame)

Load the solomon islands geography data 
Attributes:
    cen_df    Geopandas dataframe containing geographies and census data
    geo_levels    A list of the types of available aggregations
    census_vars    A dictionary of census variables in the dataset 
    data_type   Specifies whether the variable is a percentage or number
    locations A dictionary of locations accessed by the geography level

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| cen_df | DataFrame | A dataset containing the census data, |
| pop_df | DataFrame | A dataset contain the population projection data |
| geos | GeoDataFrame | A geodataframe containing geographies of data |

## Constructors for SolomonGeo class
These constudctors are used to build the SolomonGeo objects from various inputs

In [None]:
#| hide
show_doc(SolomonGeo.read_test)

---

### SolomonGeo.read_test

>      SolomonGeo.read_test ()

Contsructor that initialises the object from files using the local testing data

In [None]:
show_doc(SolomonGeo.load_pickle)

---

### SolomonGeo.load_pickle

>      SolomonGeo.load_pickle (folder:str='/testData/', aws:bool=True,
>                              file_name:str='sol_geo.pickle')

A constuctor that initialises the object from aws pickle

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| folder | str | /testData/ | file path of the folder to save in |
| aws | bool | True | Whether to load from github or local |
| file_name | str | sol_geo.pickle | file name of the saved class |

In [None]:
#| hide
show_doc(SolomonGeo.gen_stored)

---

### SolomonGeo.gen_stored

>      SolomonGeo.gen_stored (json_sol:dict)

A constructor that creates a JSON serialised SolomonGeo object from a stored geopandas dataframe.
The purpose of this is to allow the object to be stored JSON serialised in a DCC.Store object in 
the browser before being deserialised and as an object.

Note that storing and the reloading, will result in dropping the geometry.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| json_sol | dict | A geojson dataset |

## Test that dataset and geography merge perfectly

In [None]:
# TODO should this method be callable from the class
def test_geo(df:pd.DataFrame, # The dataframe to test
                geo:gpd.GeoDataFrame, # The geographys
                ):
    '''
    Tests that the provided geography and data merge perfectly. I.e. all data
    has a coresponding geography. The primary key to merge id and agg
    '''
    # Use outer join to see if anything is missing
    geo_df = geo.merge(df, left_on=['id', 'agg'], right_on=['core: id', 'core: agg'], how = 'outer', indicator = True)
    test_eq(sum(geo_df['_merge'] != 'both'), 0) # The total of non-joined rows should be 0



## Save SolomonGeo
Save as a pickled object

In [None]:
#| export
@patch
def save_pickle(self:SolomonGeo,
                aws:bool = True, # Whether to save to aws or locally
                folder:str = "/testData/", #file path of the folder to save in, only necesasry for local saving
                file_name:str = 'sol_geo.pickle' # file name of the saved class
             ):
    '''
    Save a pickle of the SolomonGeo class in backblaze b2
    '''
    if aws:
      body_pickle = pickle.dumps(self.__dict__)
      try:
        client = s3_client()
        client.put_object(
            Bucket = 'hobby-data',
            Key = file_name, 
            Body = body_pickle
        )
      except:
         raise ValueError("Issue uploading pickle file to AWS.")
    else:
      repo = Repo('.', search_parent_directories=True)
      pw = str(repo.working_tree_dir) + folder + file_name
      
      f = open(pw, 'wb')
      pickle.dump(self.__dict__, f, 2)
      f.close()


In [None]:
show_doc(SolomonGeo.save_pickle)

---

### SolomonGeo.save_pickle

>      SolomonGeo.save_pickle (aws:bool=True, folder:str='/testData/',
>                              file_name:str='sol_geo.pickle')

Save a pickle of the SolomonGeo class in backblaze b2

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| aws | bool | True | Whether to save to aws or locally |
| folder | str | /testData/ | file path of the folder to save in, only necesasry for local saving |
| file_name | str | sol_geo.pickle | file name of the saved class |

## Get Geo JSON
A getter method for the geometry portion of the dataset that returns a geoJson formated geography. 

It only includes the geography and location name as id

In [None]:
#| export
@patch
def get_geojson(self:SolomonGeo, 
                geo_filter:str = None, # Filters the geojson to the requested aggregation 
               ) -> dict: # Geo JSON formatted dataset
    '''
    A getter method for the SolomonGeo class that returns a Geo JSON formatted dataset
    '''
    ret = self.geo
    # Return only required aggregation if specified
    if geo_filter is not None:
        ret = ret.loc[ret['agg'] == geo_filter, :]
    # Return only the geometry (plus location name in id)
    # to minise file size
    return json.loads(ret.loc[:, 'geometry'].to_json())

In [None]:
#| hide
show_doc(SolomonGeo.get_geojson)

---

### SolomonGeo.get_geojson

>      SolomonGeo.get_geojson (geo_filter:str=None)

A getter method for the SolomonGeo class that returns a Geo JSON formatted dataset

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| geo_filter | str | None | Filters the geojson to the requested aggregation |
| **Returns** | **dict** |  | **Geo JSON formatted dataset** |

## Store JSON
A getter method that returns a dcc.Store object with the data of the `SolomonGeo` class
converted to a dictionary for storing with dash. We use a dictionary instead of geojson
as the time to convert dictionary back to geopandas is much faster than geojson. (0.01s vs 1.2s) This 
makes the map much more snappy and responsive

On storing, we drop the geometry as it won't store as json. Note that you can get around this
by using a geojson, but this increases reload time by a lot. 

In [None]:
#| export
@patch
def get_store(self:SolomonGeo, 
            ) -> dcc.Store: # Geo JSON formatted dataset
    '''
    A getter method that returns a dcc.Store object with the data of the `SolomonGeo` class
    converted to a dictionary for storing with dash. 
    '''
    def hier_to_pandas(df:pd.DataFrame) -> pd.DataFrame:
        cols = df.columns.droplevel(1) + ": " + df.columns.droplevel(0)
        cols = cols.tolist()
        df.columns = cols
        return df

    cen_df = copy.copy(self.census)
    cen_df = hier_to_pandas(cen_df)

    pop_df = copy.copy(self.population)
    pop_df.loc[:, ('core', 'location')] = pop_df.index
    pop_df = hier_to_pandas(pop_df)
    
    geos = copy.copy(self.geo)  
    # Need to drop geometry as it won't serialize
    geos.drop(columns = 'geometry', inplace = True)  

    return dcc.Store(id="geo_df", data={"data": {
                                            "census": cen_df.to_dict("records"),
                                            "population": pop_df.to_dict("records"),
                                            "geojson": geos.to_dict()}})

In [None]:
#| hide
show_doc(SolomonGeo.get_store)


---

### SolomonGeo.get_store

>      SolomonGeo.get_store ()

A getter method that returns a dcc.Store object with the data of the `SolomonGeo` class
converted to a dictionary for storing with dash.

## Get Census Data
Returns the census dataset for a particular aggregation

In [None]:
#| export
@patch
def get_census(self:SolomonGeo, 
                geo_filter:str = None, # Filters the dataframe to the requested geography 
                var:str = None, # Selects an upper level 
                measure:str = None, # Selects the lower level variable, if var 1 is used, measure must be used.
                loc_filter:[str] = None, # Filters one of more locations
                # TODO remove hardcoding here?
                type_filter:str = 'Total', # Return either number of proportion
                agg = False, # Whether to return the dataset aggregated for the given selection
               ) -> pd.DataFrame: # Pandas Dataframe containing population data
    '''
    A getter method for the SolomonGeo class that returns a pandas dataset containg
    the id variable and the requested census data. This is the minimal data required
    to display on the map. 
    - Optionally can aggregate the dataset, uses weighted aggregation for proportional data
    '''
    ret = self.census
    ret = ret.loc[ret['core']['type'] == type_filter, :] 
    ret = ret.set_index(ret.loc[:, ('core', 'location')]) # Change index to location as it's more desriptive
    # TODO check that filter is valid
    if geo_filter is not None:
        try:
            assert(geo_filter in ['Ward', 'Constituency', 'Province'])
        except:
            ValueError("Geo filter must be one of: ['Ward', 'Constituency', 'Province']")
        ret = ret.loc[ret['core']['agg'] == geo_filter, :]

    if loc_filter is not None:
        ret = ret.loc[ret['core']['location'].isin(loc_filter), :]

    # Return no core data to minimise the html size
    ret = ret.drop(columns = 'core', level=0)

    # Keep only selected column if required
    if measure is not None:
        try:
            assert(var is not None)
            assert(measure in self.census_vars[var])
        except:
            ValueError("If measure is set, var 1 must be set and the key value pair of var and measure must match")
        ret = ret[var].filter(items = [measure])
    elif var is not None:
        # Keep all values from upper level column
        ret = ret[var]

    ret = pd.DataFrame(ret)

    # If required, aggregate dataset based on data type
    if agg == True:
        if type_filter == 'Total':
            ret = ret.sum()
        elif type_filter == 'Proportion':
            ret = ret.sum() / ret.sum().sum() * 100
        else:
            raise ValueError('The type passed to the aggregate function must be one of the following: \'Total\', \'Proportion\'.')
    
    return ret

In [None]:
#| hide
show_doc(SolomonGeo.get_census)

---

### SolomonGeo.get_census

>      SolomonGeo.get_census (geo_filter:str=None, var:str=None,
>                             measure:str=None, loc_filter:[<class'str'>]=None,
>                             type_filter:str='Total', agg=False)

A getter method for the SolomonGeo class that returns a pandas dataset containg
the id variable and the requested census data. This is the minimal data required
to display on the map. 
- Optionally can aggregate the dataset, uses weighted aggregation for proportional data

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| geo_filter | str | None | Filters the dataframe to the requested geography |
| var | str | None | Selects an upper level |
| measure | str | None | Selects the lower level variable, if var 1 is used, measure must be used. |
| loc_filter | [<class 'str'>] | None | Filters one of more locations |
| type_filter | str | Total | Return either number of proportion |
| agg | bool | False | Whether to return the dataset aggregated for the given selection |
| **Returns** | **DataFrame** |  | **Pandas Dataframe containing population data** |

## Get Population Data
Returns the population projection data for a given aggregation

In [None]:
#| export
@patch
def get_pop(self:SolomonGeo, 
                years:[str], # Selects the year/years of data to return
                var:str = None, # Selects an upper level variable
                measure:str = None, # Selects the lower level variable, if var 1 is used, measure must be used.
                loc_filter:[str] = None, # Filters one of more locations
                type_filter:str = 'Total', # Return either number of proportion
                agg = False, # Whether to return the dataset aggregated for the given selection
                agg_location = False, # If true, don't aggregate the population data by location
                agg_ages = False, # If true, don't aggregate the population data by age
                ages:[str] = None, # Filters for one or more Age Brackets, if none returns all
               ) -> pd.DataFrame: # Pandas Dataframe containing population data
    '''
    A getter method for the SolomonGeo class that returns a pandas dataset containg
    the id variable and the requested popultion data. This is the minimal data required
    to display on the map. 
    '''
    geo_filter = 'Province'
    ret = self.population
    ret = ret.loc[ret['core']['type'] == type_filter, :] 
    # TODO check that filter is valid
    if geo_filter is not None:
        try:
            assert(geo_filter in ['Ward', 'Constituency', 'Province'])
        except:
            ValueError("Geo filter must be one of: ['Ward', 'Constituency', 'Province']")
        ret = ret.loc[ret['core']['agg'] == geo_filter, :]

    # Filter to years
    ret = ret.loc[ret['core']['year'].isin(years), :]

    if loc_filter is not None:
        ret = ret.loc[ret.index.isin(loc_filter), :]

    if ages is not None:
        ret = ret.loc[ret['Age']['Age_Bracket'].isin(ages), :]

    # Return no core data to minimise the html size
    age_year = copy.copy(ret)
    age_year = age_year[[('core', 'year'), ('Age', 'Age_Bracket')]]
    # WARNING - don't reorder before the concat below
    ret = ret.drop(columns = ['core', 'Age'], level=0)
    ret = ret.drop(columns = "Numerical_Bracket", level = 1)

    # Keep only selected column if required
    if measure is not None:
        try:
            assert(var is not None)
            assert(measure in self.population_vars[var])
        except:
            ValueError("If measure is set, var 1 must be set and the key value pair of var and measure must match")
        ret = ret[var].filter(items = [measure])
        # Make it multiindex again
        ret.columns = pd.MultiIndex.from_arrays(([var], [measure]))
        
        ret = pd.concat([age_year, ret], axis = 1)
    elif var is not None:
        # Keep all values from upper level column
        ret = ret[var]
        # Make it multiindex again
        measures = self.population_vars[var]
        n_measures = len(measures)
        vars = list(np.repeat(var, n_measures))
        ret.columns = pd.MultiIndex.from_arrays((vars, measures))
        ret = pd.concat([age_year, ret], axis = 1)
    else:
        ret = pd.concat([age_year, ret], axis = 1)
        ret.columns = ret.columns.get_level_values(1)
        # TODO incosistent column naming based on variable, measure or no selection

    ret = pd.DataFrame(ret)

    # Set the group by variables
    group_by = [('core', 'year')]
    if agg_location == True:
        group_by.append(ret.index)
    if agg_ages == True:
        group_by.append(('Age', 'Age_Bracket'))
     # If required, aggregate dataset based on data type
    if agg == True:
        if type_filter in ['Total', 'Proportion']:
            ret = ret.groupby(group_by, sort = False).sum(numeric_only= True)
        # TODO look into second aggregation strat when doing growth
        #elif type_filter == 'Proportion':
        #    ret = ret.groupby(group_by, sort = False).sum(numeric_only= True) / ret.groupby(group_by, sort = False).sum(numeric_only= True).sum(numeric_only= True) * 100
        else:
            raise ValueError('The type passed to the aggregate function must be one of the following: \'Total\', \'Proportion\'.')
        
    return ret

# Testing

In [None]:
sol_geo = SolomonGeo.read_test()

  df['pk'] = df['location'] + "_" + df["type"]


In [None]:
test = sol_geo.population

In [None]:
test['Age']['Age_Bracket']

(core, location)
Central      0-4
Central      0-4
Central      5-9
Central      5-9
Central    10-14
           ...  
Western    70-74
Western    75-79
Western    75-79
Western      80+
Western      80+
Name: Age_Bracket, Length: 5440, dtype: object

In [None]:
tester = sol_geo.get_pop(years = [2010, 2011, 2012], var = "Population", measure='Total').values[:, -1]
tester
# TODO work out how to aggregate all the columns other than age.

array([4156.0, 3988.0, 3289.0, 2521.0, 2118.0, 2102.0, 2022.0, 1940.0,
       1382.0, 1126.0, 821.0, 694.0, 563.0, 428.0, 293.0, 212.0, 199.0,
       4115.0, 4064.0, 3444.0, 2638.0, 2131.0, 2060.0, 2022.0, 1953.0,
       1498.0, 1139.0, 879.0, 695.0, 585.0, 442.0, 306.0, 209.0, 209.0,
       4062.0, 4112.0, 3594.0, 2772.0, 2164.0, 2018.0, 2022.0, 1944.0,
       1623.0, 1150.0, 944.0, 697.0, 606.0, 456.0, 321.0, 210.0, 217.0,
       4339.0, 4133.0, 3418.0, 2721.0, 2275.0, 2072.0, 1916.0, 1841.0,
       1431.0, 1147.0, 835.0, 706.0, 509.0, 483.0, 311.0, 207.0, 136.0,
       4366.0, 4211.0, 3580.0, 2842.0, 2345.0, 2103.0, 1946.0, 1863.0,
       1528.0, 1186.0, 892.0, 715.0, 543.0, 471.0, 344.0, 206.0, 151.0,
       4397.0, 4262.0, 3739.0, 2971.0, 2426.0, 2137.0, 1981.0, 1873.0,
       1629.0, 1224.0, 958.0, 722.0, 584.0, 454.0, 378.0, 206.0, 167.0,
       16116.0, 14274.0, 11675.0, 9959.0, 10071.0, 8908.0, 7662.0, 6667.0,
       4865.0, 3772.0, 2509.0, 2035.0, 1456.0, 1248.0, 878.0, 561.0

In [None]:
sol_geo.get_pop(years = [2010, 2011, 2012], var = 'Population', measure = 'Total',  ages = ["0-4"])#, agg = True) loc_filter = ['Honiara', 'Choiseul'],

Unnamed: 0_level_0,core,Age,Population
Unnamed: 0_level_1,year,Age_Bracket,Total
"(core, location)",Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Central,2010,0-4,4156.0
Central,2011,0-4,4115.0
Central,2012,0-4,4062.0
Choiseul,2010,0-4,4339.0
Choiseul,2011,0-4,4366.0
Choiseul,2012,0-4,4397.0
Guadalcanal,2010,0-4,16116.0
Guadalcanal,2011,0-4,16545.0
Guadalcanal,2012,0-4,17002.0
Honiara,2010,0-4,8338.0


In [None]:
sol_geo.locations

{'Province': array(['Central', 'Choiseul', 'Guadalcanal', 'Honiara', 'Isabel',
        'Makira-Ulawa', 'Malaita', 'Rennell-Bell', 'Temotu', 'Western'],
       dtype=object),
 'Constituency': array(['Auki-Langalanga', 'Baegu-Asifola', 'Central Guadalcanal',
        'Central Honiara', "Central Kwara'ae", 'Central Makira',
        'East AreAre', 'East Choiseul', 'East Guadalcanal', 'East Honiara',
        'East Kwaio', 'East Makira', 'East Malaita', 'East-Central',
        'Fataleka', 'Gao-Bugotu', 'Gizo-Kolombangara',
        'Hograno-Kia-Havulei', 'Lau-Mbaelela', 'Malaita Outer Island',
        'Marigne-Kokota', 'Marovo', 'Nggela', 'North Guadalcanal',
        'North Malaita', 'North New Georgia', 'North Vella la Vella',
        'North West Choiseul', 'North West Guadalcanl', 'North-East',
        'Ranongga-Simbo', 'Rennell-Bellona', 'Savo-Russull Islands',
        'Shortland Islands', 'Small Malaita', 'South Choiseul',
        'South Guadalcanal', 'South New Georgia-Rendova',
        '

## Save and Load

Test that the newly created solomon geo object can be saved to aws

In [None]:
sol_geo.save_pickle()

Test that we can connect to the aws s3 bucket

In [None]:
# Create the S3 object
obj = s3_client().get_object(
    Bucket = 'hobby-data',
    Key = 'test.txt', 
)

# Read in the pickle
try:
    data = obj['Body'].read()
except:
    raise ValueError("Issue dowloading test file from AWS.")

In [None]:
#| slow
SolomonGeo.load_pickle('/testData/', aws=True)

<__main__.SolomonGeo>

Test that gen_stored creates a copy correctly from a json serialised dataframe

In [None]:
stored_geo = sol_geo.get_store()
# TODO I need to potentially created a function to check if two objects are the same

In [None]:
restored_geo = SolomonGeo.gen_stored(stored_geo.data)


In [None]:
restored_geo.get_census(geo_filter = 'Constituency', loc_filter=['Nggela', 'East AreAre'])

Unnamed: 0_level_0,Key Statistics,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,...,Disability seeing,Disability hearing,Disability hearing,Disability hearing,Disability walking,Disability walking,Disability walking,Disability remembering,Disability remembering,Disability remembering
Unnamed: 0_level_1,Total Households,metered SIWA drinking water,communal standpipe,private water tank,communal water tank,well - protected,well - unprotected,river or stream,bottled water,other source of drinking water,...,Cannot perform the task,No Difficulty at all,Some Difficulty,Cannot perform the task,No Difficulty at all,Some Difficulty,Cannot perform the task,No Difficulty at all,Some Difficulty,Cannot perform the task
"(core, location)",Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
East AreAre,1105,1.0,815.0,12.0,9.0,1.0,3.0,223.0,0.0,41.0,...,352.0,13.0,6888.0,197.0,14.0,6725.0,339.0,35.0,6811.0,262.0
Nggela,3315,26.0,1557.0,258.0,437.0,52.0,13.0,868.0,1.0,103.0,...,1938.0,50.0,15713.0,1017.0,81.0,14689.0,1943.0,179.0,14785.0,1917.0


In [None]:
# TODO create a test to check that two object are the same. Probably need to write defualt behaviour into
# object. Maybe use fastcore test_eq
#test_eq(stored_geo.geo_df, sol_geo.geo_df)

Test filtering of down to multiple locations

In [None]:
#test_eq(sol_geo.get_census(geo_filter = 'constituency')['total_pop'].sum(), const_df['total_pop'].sum())
sol_geo.get_census(geo_filter = 'Constituency', loc_filter=['Nggela', 'East AreAre'])

Unnamed: 0_level_0,Key Statistics,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,...,Disability seeing,Disability hearing,Disability hearing,Disability hearing,Disability walking,Disability walking,Disability walking,Disability remembering,Disability remembering,Disability remembering
Unnamed: 0_level_1,Total Households,metered SIWA drinking water,communal standpipe,private water tank,communal water tank,well - protected,well - unprotected,river or stream,bottled water,other source of drinking water,...,Cannot perform the task,No Difficulty at all,Some Difficulty,Cannot perform the task,No Difficulty at all,Some Difficulty,Cannot perform the task,No Difficulty at all,Some Difficulty,Cannot perform the task
"(core, location)",Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
East AreAre,1105,1.0,815.0,12.0,9.0,1.0,3.0,223.0,0.0,41.0,...,352.0,13.0,6888.0,197.0,14.0,6725.0,339.0,35.0,6811.0,262.0
Nggela,3315,26.0,1557.0,258.0,437.0,52.0,13.0,868.0,1.0,103.0,...,1938.0,50.0,15713.0,1017.0,81.0,14689.0,1943.0,179.0,14785.0,1917.0


In [None]:
# TODO work out how to check save and load are the same as a test
# Might need to wreite and equality function
restored_geo.population

Unnamed: 0_level_0,core,core,core,core,Age,Age,Population,Population,Population
Unnamed: 0_level_1,id,agg,type,year,Age_Bracket,Numerical_Bracket,Males,Females,Total
"(core, location)",Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Central,4,Province,Total,2010,0-4,0,2119.000000,2037.000000,4156.000000
Central,4,Province,Proportion,2010,0-4,0,0.007116,0.007210,0.007162
Central,4,Province,Total,2010,5-9,5,2074.000000,1914.000000,3988.000000
Central,4,Province,Proportion,2010,5-9,5,0.006965,0.006775,0.006872
Central,4,Province,Total,2010,10-14,10,1732.000000,1556.000000,3289.000000
...,...,...,...,...,...,...,...,...,...
Western,2,Province,Proportion,2025,70-74,70,0.001877,0.001965,0.001920
Western,2,Province,Total,2025,75-79,75,466.000000,493.000000,959.000000
Western,2,Province,Proportion,2025,75-79,75,0.001197,0.001315,0.001255
Western,2,Province,Total,2025,80+,80,448.000000,449.000000,897.000000


In [None]:
test = sol_geo.population
test.loc[:, ('core', 'location')] = test.index

In [None]:
test

Unnamed: 0_level_0,core,core,core,core,Age,Age,Population,Population,Population,core
Unnamed: 0_level_1,id,agg,type,year,Age_Bracket,Numerical_Bracket,Males,Females,Total,location
"(core, location)",Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
Central,4,Province,Total,2010,0-4,0,2119.000000,2037.000000,4156.000000,Central
Central,4,Province,Proportion,2010,0-4,0,0.007116,0.007210,0.007162,Central
Central,4,Province,Total,2010,5-9,5,2074.000000,1914.000000,3988.000000,Central
Central,4,Province,Proportion,2010,5-9,5,0.006965,0.006775,0.006872,Central
Central,4,Province,Total,2010,10-14,10,1732.000000,1556.000000,3289.000000,Central
...,...,...,...,...,...,...,...,...,...,...
Western,2,Province,Proportion,2025,70-74,70,0.001877,0.001965,0.001920,Western
Western,2,Province,Total,2025,75-79,75,466.000000,493.000000,959.000000,Western
Western,2,Province,Proportion,2025,75-79,75,0.001197,0.001315,0.001255,Western
Western,2,Province,Total,2025,80+,80,448.000000,449.000000,897.000000,Western


In [None]:
# TODO should do a count check for the geojson similar to this
sol_geo.census[sol_geo.census['core']['agg'] == 'Constituency'].count()

core                    id                         100
                        location                   100
                        agg                        100
                        type                       100
Key Statistics          Total Households           100
                                                  ... 
Disability walking      Some Difficulty            100
                        Cannot perform the task    100
Disability remembering  No Difficulty at all       100
                        Some Difficulty            100
                        Cannot perform the task    100
Length: 339, dtype: int64

# TODO check that all proportions are less than 0

In [None]:
# TODO filter and test equality?
test = sol_geo.get_geojson(geo_filter = 'Ward')

In [None]:
sol_geo.geo

Unnamed: 0_level_0,geometry,id,agg,year,location
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Aba - Asimeuru,"MULTIPOLYGON (((161.54336 -9.54751, 161.54367 ...",722,Ward,2009,Aba - Asimeuru
Aiaisi,"MULTIPOLYGON (((161.26900 -9.21229, 161.27144 ...",719,Ward,2009,Aiaisi
Aimela,"MULTIPOLYGON (((160.68653 -8.71158, 160.67020 ...",702,Ward,2009,Aimela
Aola,"MULTIPOLYGON (((160.50139 -9.52996, 160.50196 ...",616,Ward,2009,Aola
Areare,"MULTIPOLYGON (((161.35878 -9.46664, 161.35859 ...",720,Ward,2009,Areare
...,...,...,...,...,...
West Makira,"MULTIPOLYGON (((161.50441 -10.51180, 161.50559...",844,Constituency,2009,West Makira
West New Georgia-Vonavona,"MULTIPOLYGON (((157.27598 -8.35143, 157.26991 ...",209,Constituency,2009,West New Georgia-Vonavona
West Te Nggano,"MULTIPOLYGON (((160.35206 -11.66366, 160.30721...",502,Ward,2009,West Te Nggano
West Ulawa,"MULTIPOLYGON (((161.97730 -9.77521, 161.94899 ...",803,Ward,2009,West Ulawa


The sum after merging and filtering should equal the sum from the raw dataset.

In [None]:
test_eq(sol_geo.get_census(geo_filter = 'Constituency',var = 'Key Statistics', measure = 'Total Households', agg = True)[0], 
        const_df.loc[const_df['type'] == 'Total'].loc[const_df['agg'] == 'Constituency']['Key Statistics: Total Households'].sum())

  test_eq(sol_geo.get_census(geo_filter = 'Constituency',var = 'Key Statistics', measure = 'Total Households', agg = True)[0],


In [None]:
sol_geo.get_census(geo_filter = 'Constituency', var='Key Statistics', measure = 'Total Households', agg = True)

Total Households    91251
dtype: int64

In [None]:
sol_geo.get_census(geo_filter = 'Constituency', var='Key Statistics', measure = 'Total Households', loc_filter=['West Guadalcanal']).values[0]

array([1671])

In [None]:
sol_geo.census['core'].filter(items = ['id'])

Unnamed: 0_level_0,id
pk,Unnamed: 1_level_1
Aba - Asimeuru_Proportion,722
Aba - Asimeuru_Total,722
Aiaisi_Total,719
Aiaisi_Proportion,719
Aimela_Proportion,702
...,...
West Te Nggano_Proportion,502
West Ulawa_Proportion,803
West Ulawa_Total,803
Western_Total,2


In [None]:
# TODO -test that they all sum to one
test_agg = sol_geo.get_census(geo_filter = 'Constituency', type_filter='Proportion', var = "Main source of household drinking water", 
                              agg = True)
test_eq(test_agg.sum(), 100.0)
test_agg

metered SIWA drinking water        5.939895
communal standpipe                36.295381
private water tank                13.917023
communal water tank               12.631202
well - protected                   1.820029
well - unprotected                 1.608226
river or stream                   23.762780
bottled water                      0.550409
other source of drinking water     3.475056
dtype: float64

In [None]:
test_agg.sum()

100.0

In [None]:
sol_geo.ages

['0-4',
 '10-14',
 '15-19',
 '20-24',
 '25-29',
 '30-34',
 '35-39',
 '40-44',
 '45-49',
 '5-9',
 '50-54',
 '55-59',
 '60-64',
 '65-69',
 '70-74',
 '75-79',
 '80+']

In [None]:
sol_geo.pop_years

[2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016,
 2017,
 2018,
 2019,
 2020,
 2021,
 2022,
 2023,
 2024,
 2025]

### Testing of Population Getter

In [None]:
sol_geo.get_pop(years = [2024],var='Population',
    measure='Total',
    loc_filter=None,
    type_filter='Total',
    agg=False,
    ages=['0-4'] 
    )

Unnamed: 0_level_0,core,Age,Population
Unnamed: 0_level_1,year,Age_Bracket,Total
"(core, location)",Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Central,2024,0-4,3641.0
Choiseul,2024,0-4,4973.0
Guadalcanal,2024,0-4,22087.0
Honiara,2024,0-4,8535.0
Isabel,2024,0-4,4203.0
Makira-Ulawa,2024,0-4,7242.0
Malaita,2024,0-4,19039.0
Rennell-Bell,2024,0-4,581.0
Temotu,2024,0-4,2697.0
Western,2024,0-4,11834.0


In [None]:
test = sol_geo.get_pop(years = [2023, 2024],var='Population',
    measure='Total',
    loc_filter=None,
    type_filter='Total',
    agg=False,
    ages=['0-4'] 
    )

In [None]:
test = sol_geo.get_pop(years = sol_geo.pop_years,var='Population',
    measure='Total',
    type_filter='Total',
    agg=True,
    agg_location = True,
    ages=sol_geo.ages
    )

Check that sum of proportions rounded is 100

In [None]:
prop = sol_geo.get_pop(years = [sol_geo.pop_years[-1]],
                var='Population',
                measure='Total',
                type_filter='Proportion',
                agg=True,
                ages=sol_geo.ages,
    ).values[0, 0]
test_eq(1, int(prop))


In [None]:
sol_geo.get_pop(years = [2024],
                var='Population',
                measure='Total',
                type_filter='Total',
                agg=True,
                ages=sol_geo.ages,
                agg_location = True,
    )

Unnamed: 0_level_0,Unnamed: 1_level_0,Population
Unnamed: 0_level_1,Unnamed: 1_level_1,Total
"(core, year)","(core, location)",Unnamed: 2_level_2
2024,Central,34359.0
2024,Choiseul,40226.0
2024,Guadalcanal,178238.0
2024,Honiara,97801.0
2024,Isabel,38134.0
2024,Makira-Ulawa,59705.0
2024,Malaita,165610.0
2024,Rennell-Bell,4702.0
2024,Temotu,26182.0
2024,Western,105367.0


In [None]:
sol_geo.locations['Province']

array(['Central', 'Choiseul', 'Guadalcanal', 'Honiara', 'Isabel',
       'Makira-Ulawa', 'Malaita', 'Rennell-Bell', 'Temotu', 'Western'],
      dtype=object)

In [None]:
sol_geo.census

Unnamed: 0_level_0,core,core,core,core,Key Statistics,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,...,Disability seeing,Disability hearing,Disability hearing,Disability hearing,Disability walking,Disability walking,Disability walking,Disability remembering,Disability remembering,Disability remembering
Unnamed: 0_level_1,id,location,agg,type,Total Households,metered SIWA drinking water,communal standpipe,private water tank,communal water tank,well - protected,...,Cannot perform the task,No Difficulty at all,Some Difficulty,Cannot perform the task,No Difficulty at all,Some Difficulty,Cannot perform the task,No Difficulty at all,Some Difficulty,Cannot perform the task
pk,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Aba - Asimeuru_Proportion,722,Aba - Asimeuru,Ward,Proportion,1,0.0000,0.5378,0.0245,0.0085,0.0011,...,0.003849,0.940843,0.054295,0.004862,0.880065,0.105754,0.014182,0.906807,0.081645,0.011548
Aba - Asimeuru_Total,722,Aba - Asimeuru,Ward,Total,939,0.0000,505.0000,23.0000,8.0000,1.0000,...,19.000000,4644.000000,268.000000,24.000000,4344.000000,522.000000,70.000000,4476.000000,403.000000,57.000000
Aiaisi_Total,719,Aiaisi,Ward,Total,561,1.0000,403.0000,9.0000,3.0000,1.0000,...,9.000000,3415.000000,149.000000,10.000000,3316.000000,239.000000,19.000000,3349.000000,205.000000,20.000000
Aiaisi_Proportion,719,Aiaisi,Ward,Proportion,1,0.0018,0.7184,0.0160,0.0053,0.0018,...,0.002518,0.955512,0.041690,0.002798,0.927812,0.066872,0.005316,0.937045,0.057359,0.005596
Aimela_Proportion,702,Aimela,Ward,Proportion,1,0.0164,0.2937,0.0762,0.1921,0.1360,...,0.001571,0.959288,0.038749,0.001964,0.954444,0.039796,0.005760,0.951172,0.043330,0.005498
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
West Te Nggano_Proportion,502,West Te Nggano,Ward,Proportion,1,0.0000,0.0000,0.3704,0.4568,0.0000,...,0.000000,0.978836,0.018519,0.002646,0.968254,0.031746,0.000000,0.986772,0.013228,0.000000
West Ulawa_Proportion,803,West Ulawa,Ward,Proportion,1,0.0099,0.7673,0.0050,0.0050,0.0000,...,0.000000,0.929070,0.069767,0.001163,0.886047,0.113953,0.000000,0.920930,0.077907,0.001163
West Ulawa_Total,803,West Ulawa,Ward,Total,202,2.0000,155.0000,1.0000,1.0000,0.0000,...,0.000000,799.000000,60.000000,1.000000,762.000000,98.000000,0.000000,792.000000,67.000000,1.000000
Western_Total,2,Western,Province,Total,13762,37.0000,4151.0000,4596.0000,2426.0000,46.0000,...,159.000000,72641.000000,3788.000000,220.000000,71496.000000,4644.000000,509.000000,69162.000000,6901.000000,586.000000


## Check Some of the datasets
necessary??

In [None]:
sol_geo.census

Unnamed: 0_level_0,core,core,core,core,Key Statistics,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,Main source of household drinking water,...,Disability seeing,Disability hearing,Disability hearing,Disability hearing,Disability walking,Disability walking,Disability walking,Disability remembering,Disability remembering,Disability remembering
Unnamed: 0_level_1,id,location,agg,type,Total Households,metered SIWA drinking water,communal standpipe,private water tank,communal water tank,well - protected,...,Cannot perform the task,No Difficulty at all,Some Difficulty,Cannot perform the task,No Difficulty at all,Some Difficulty,Cannot perform the task,No Difficulty at all,Some Difficulty,Cannot perform the task
pk,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Aba - Asimeuru_Proportion,722,Aba - Asimeuru,Ward,Proportion,1,0.0000,0.5378,0.0245,0.0085,0.0011,...,0.003849,0.940843,0.054295,0.004862,0.880065,0.105754,0.014182,0.906807,0.081645,0.011548
Aba - Asimeuru_Total,722,Aba - Asimeuru,Ward,Total,939,0.0000,505.0000,23.0000,8.0000,1.0000,...,19.000000,4644.000000,268.000000,24.000000,4344.000000,522.000000,70.000000,4476.000000,403.000000,57.000000
Aiaisi_Total,719,Aiaisi,Ward,Total,561,1.0000,403.0000,9.0000,3.0000,1.0000,...,9.000000,3415.000000,149.000000,10.000000,3316.000000,239.000000,19.000000,3349.000000,205.000000,20.000000
Aiaisi_Proportion,719,Aiaisi,Ward,Proportion,1,0.0018,0.7184,0.0160,0.0053,0.0018,...,0.002518,0.955512,0.041690,0.002798,0.927812,0.066872,0.005316,0.937045,0.057359,0.005596
Aimela_Proportion,702,Aimela,Ward,Proportion,1,0.0164,0.2937,0.0762,0.1921,0.1360,...,0.001571,0.959288,0.038749,0.001964,0.954444,0.039796,0.005760,0.951172,0.043330,0.005498
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
West Te Nggano_Proportion,502,West Te Nggano,Ward,Proportion,1,0.0000,0.0000,0.3704,0.4568,0.0000,...,0.000000,0.978836,0.018519,0.002646,0.968254,0.031746,0.000000,0.986772,0.013228,0.000000
West Ulawa_Proportion,803,West Ulawa,Ward,Proportion,1,0.0099,0.7673,0.0050,0.0050,0.0000,...,0.000000,0.929070,0.069767,0.001163,0.886047,0.113953,0.000000,0.920930,0.077907,0.001163
West Ulawa_Total,803,West Ulawa,Ward,Total,202,2.0000,155.0000,1.0000,1.0000,0.0000,...,0.000000,799.000000,60.000000,1.000000,762.000000,98.000000,0.000000,792.000000,67.000000,1.000000
Western_Total,2,Western,Province,Total,13762,37.0000,4151.0000,4596.0000,2426.0000,46.0000,...,159.000000,72641.000000,3788.000000,220.000000,71496.000000,4644.000000,509.000000,69162.000000,6901.000000,586.000000


In [None]:
sol_geo.geo

Unnamed: 0_level_0,geometry,id,agg,year,location
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Aba - Asimeuru,"MULTIPOLYGON (((161.54336 -9.54751, 161.54367 ...",722,Ward,2009,Aba - Asimeuru
Aiaisi,"MULTIPOLYGON (((161.26900 -9.21229, 161.27144 ...",719,Ward,2009,Aiaisi
Aimela,"MULTIPOLYGON (((160.68653 -8.71158, 160.67020 ...",702,Ward,2009,Aimela
Aola,"MULTIPOLYGON (((160.50139 -9.52996, 160.50196 ...",616,Ward,2009,Aola
Areare,"MULTIPOLYGON (((161.35878 -9.46664, 161.35859 ...",720,Ward,2009,Areare
...,...,...,...,...,...
West Makira,"MULTIPOLYGON (((161.50441 -10.51180, 161.50559...",844,Constituency,2009,West Makira
West New Georgia-Vonavona,"MULTIPOLYGON (((157.27598 -8.35143, 157.26991 ...",209,Constituency,2009,West New Georgia-Vonavona
West Te Nggano,"MULTIPOLYGON (((160.35206 -11.66366, 160.30721...",502,Ward,2009,West Te Nggano
West Ulawa,"MULTIPOLYGON (((161.97730 -9.77521, 161.94899 ...",803,Ward,2009,West Ulawa


In [None]:
# TODO write testing that checks that different methods create equilvalent objects.
# TODO - need to write an equality object

In [None]:
test.loc[2024]

Unnamed: 0_level_0,Population
Unnamed: 0_level_1,Total
"(core, location)",Unnamed: 1_level_2
Central,34359.0
Choiseul,40226.0
Guadalcanal,178238.0
Honiara,97801.0
Isabel,38134.0
Makira-Ulawa,59705.0
Malaita,165610.0
Rennell-Bell,4702.0
Temotu,26182.0
Western,105367.0


In [None]:
len(sol_geo.pop_years)

16

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()