In [None]:
import os
%cd {os.environ['PWD']}

from django.conf import settings
import django

os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

django.setup()

### check apps and models available in HEA for reference

In [None]:
from django.apps import apps

# list of all installed apps
all_apps = apps.get_app_configs()

for app in all_apps:
    print("app: ", app.name)
    for model in app.get_models():
        print("model: ", model.__name__)
        

### Load libraries needed

In [None]:
# import libraries
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Load the datasets needed

In [None]:
# Get Key wealth breakdown data extract
df = pd.read_csv('./Jupyter/bss_extracts/data_WB.csv')
df_temp = df.T
# df_temp.columns = df.iloc[0]
# df_temp = df_temp.drop(df.index[0])
df_temp[2] = df_temp[2].str.capitalize()
df_temp.iloc[0] = df_temp.iloc[0].str.lower()

df_temp.head(3)

In [None]:
df = df_temp.T
df.sample(10)

In [None]:
# df['bss_year'] = df['bss_name'].str.split().str[-1] + '-01-01'
row_index = 0
for col in df.columns:
    if isinstance(df.loc[row_index, col], str):  # Check if the cell contains a string
        df.loc[row_index, 'bss_year'] = df.loc[row_index, col].split()[-1] + '-01-01'


# display a few rows of the dataset to see structure
df.head(8)

In [None]:
'''
Load the countries data. Data extracted from FDW. 
filter the data to only contain the countries whose BSS are available.
Display a few rows of the data to explore
'''
countries_df = pd.read_csv('./Jupyter/bss_extracts/Country.csv')

# BSS Countries 
countries = [
    "Somalia", "Niger", "Malawi", "Madagascar", "Haiti", "Guatemala", 
    "Democratic Republic of the Congo", "Burkina Faso", "Liberia", 
    "Sierra Leone", "Senegal"
]

bss_countries = countries_df[countries_df['iso_en_name'].isin(countries)].copy()

# remove non printable characters from the iso3266a2 column
bss_countries.loc[:, 'iso3166a2'] = bss_countries['iso3166a2'].str.replace(r'[^\x20-\x7E]', '', regex=True)

# since original countries data received does not have iso_en_proper, create the column and make it equal to the name column
bss_countries.loc[:, 'iso_en_proper'] = bss_countries['iso_en_name']

# reset index for tidiness
bss_countries.reset_index(drop=True, inplace=True)


bss_countries.head()

### Explore the modesl to see what fields are needed while adding data based on dependencies

### Add Countries data to the Country table

In [None]:
from common.models import Country

for index, row in bss_countries.iterrows():
    # Assuming you're creating new instances:
    model_instance = Country(
        iso3166a2=row['iso3166a2'],
        iso3166a3=row['iso3166a3'],
        iso_en_name=row['iso_en_name'],
        name=row['iso_en_name'],
        iso3166n3=row['iso3166n3'],
        iso_en_proper=row['iso_en_proper'],
        iso_en_ro_name=row['iso_en_ro_name'],
        iso_en_ro_proper=row['iso_en_ro_proper'],
        iso_fr_name=row['iso_fr_name'], 
        iso_fr_proper=row['iso_fr_proper'],
        iso_es_name=row['iso_es_name'],
        
    )
    model_instance.save()


### Add wealth categories

In [None]:
from metadata.models import WealthCategory
fields = WealthCategory._meta.fields
fields

In [None]:
wealth_cat_df = pd.read_csv('./Jupyter/bss_extracts/wealth_categories.csv')

from metadata.models import WealthCategory

for index, row in wealth_cat_df.iterrows():
    
    # Assuming we're creating new instances:
    model_instance = WealthCategory(
        code=row['code'],
        name=row['description'],
        description=row['description'],
    )
    model_instance.save()


### Add LivelihoodZone data

In [None]:
from baseline.models import LivelihoodZone
fields = LivelihoodZone._meta.fields

fields

In [None]:
df_LZ = pd.read_csv('./Jupyter/bss_extracts/LZ.csv')
df_LZ.bss_country = df_LZ['bss_country'].str.capitalize()

df_LZ.head()

In [None]:

from baseline.models import LivelihoodZone
from common.models import Country

for index, row in df_LZ.iterrows():
    try:
        country_obj = Country.objects.get(name=row['bss_country'])  # Assuming countries are unique by name.
        
        # Create a new instance of LivelihoodZone
        model_instance = LivelihoodZone(
            code=row['code'],
            name=row['livelihood_zone'],
            # description=row['bss_name'],
            country=country_obj  # Set the country field to the Country instance
        )
        model_instance.save()
        print(f"Saved LivelihoodZone with code: {row['bss_name']}")
    
    except Country.DoesNotExist:
        print(f"Country not found for name: {row['bss_country']}")
    except Exception as e:
        print(f"Error on row {index}: {e}")



### Add Livelihood categories

In [None]:
from metadata.models import LivelihoodCategory
fields = LivelihoodCategory._meta.fields
fields

In [None]:
LZ_baseline = pd.read_csv('./Jupyter/bss_extracts/LZ_baseline.csv')

from metadata.models import LivelihoodCategory

for index, row in LZ_baseline.iterrows():
    # Assuming you're creating new instances:
    model_instance = LivelihoodCategory(
        code=row['code'],
        name=row['category'],
        description=row['category'],
        
    )
    model_instance.save()


### Adding source organizations

In [None]:
from baseline.models import SourceOrganization
fields = SourceOrganization._meta.fields
fields

In [None]:
source_org = pd.read_csv('Jupyter/bss_extracts/source_org.csv')
from baseline.models import SourceOrganization

for index, row in source_org.iterrows():

    model_instance = SourceOrganization(
        name=row['name'],
        full_name=row['full_name'],
        description=row['full_name'],
        
    )
    model_instance.save()

### Add Livelihood_baseline

In [None]:
from baseline.models import LivelihoodZoneBaseline
fields = LivelihoodZoneBaseline._meta.fields

fields

In [None]:
LZ_baseline['geography'] = bss_countries['bbox']
LZ_baseline.head()

In [None]:
from baseline.models import LivelihoodZoneBaseline, LivelihoodZone, SourceOrganization
from metadata.models import LivelihoodCategory

for index, row in LZ_baseline.iterrows():
    # For ForeignKey fields, create the related instances
    livelihood_zone, _ = LivelihoodZone.objects.get_or_create(name=row['livelihood_zone'])
    main_livelihood_category, _ = LivelihoodCategory.objects.get_or_create(name=row['category'])
    Source_Organization, _ = SourceOrganization.objects.get_or_create(name=row['source_organization'])
    

    # Create a new instance of the model and populate the fields
    model_instance = LivelihoodZoneBaseline(
        livelihood_zone=livelihood_zone,
        main_livelihood_category=main_livelihood_category,
        # bss=row['file_url'],
        # geography=row['geography'],
        source_organization=Source_Organization,
        reference_year_start_date=row['bss_year'],
        reference_year_end_date=row['bss_year'],
        valid_from_date=row['bss_year'],
        valid_to_date=row['bss_year']
    )
    
    # Save the instance to the database
    model_instance.save()


### Adding community data

In [None]:
from baseline.models import Community
fields = Community._meta.fields
fields

In [None]:
community_df.columns

In [None]:
community_df = pd.read_csv('./Jupyter/bss_extracts/community_df2.csv')
from baseline.models import Community, LivelihoodZoneBaseline

for index, row in community_df.iterrows():
    # Fetch or create the related LivelihoodZoneBaseline instance
    lzb = LivelihoodZoneBaseline.objects.filter(livelihood_zone=row['code']).first()
    if lzb is None:
        # If no matching LivelihoodZoneBaseline was found, create one
        lzb = LivelihoodZoneBaseline.objects.create(livelihood_zone=row['code'])
    
    model_instance = Community(
        name=row['Village'],
        livelihood_zone_baseline=lzb,
        # geography=row['District'], # Assuming this column should be mapped to the 'geography' field
        interview_number=row['Interview number:'],
        interviewers=row['Interviewers']
    )
    model_instance.save()


### Adding WealthGroup data

In [None]:
# load the app model
from baseline.models import WealthGroup
fields = WealthGroup._meta.fields

fields