In [2]:
import os
%cd {os.environ['PWD']}

from django.conf import settings
import django

os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

django.setup()

/usr/src/app


### check apps and models available in HEA for reference

In [3]:
from django.apps import apps

# list of all installed apps
all_apps = apps.get_app_configs()

for app in all_apps:
    print("app: ", app.name)
    for model in app.get_models():
        print("model: ", model.__name__)
        

app:  rest_framework
app:  django_filters
app:  django.contrib.admin
model:  LogEntry
app:  django.contrib.auth
model:  Permission
model:  Group
model:  User
app:  django.contrib.contenttypes
model:  ContentType
app:  django.contrib.sessions
model:  Session
app:  django.contrib.messages
app:  django.contrib.staticfiles
app:  django.contrib.gis
model:  PostGISGeometryColumns
model:  PostGISSpatialRefSys
app:  django.contrib.admindocs
app:  binary_database_files
model:  File
app:  django_extensions
app:  common
model:  Country
model:  Currency
model:  UnitOfMeasure
model:  UnitOfMeasureConversion
model:  ClassifiedProduct
model:  CountryClassifiedProductAliases
app:  metadata
model:  LivelihoodCategory
model:  WealthCharacteristic
model:  SeasonalActivityType
model:  WealthCategory
model:  Market
model:  HazardCategory
model:  Season
app:  baseline
model:  SourceOrganization
model:  LivelihoodZone
model:  LivelihoodZoneBaseline
model:  LivelihoodProductCategory
model:  Community
model:  

### Load libraries needed

In [4]:
# import libraries
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Load the datasets needed

In [5]:
'''
Load the countries data. Data extracted from FDW. 
filter the data to only contain the countries whose BSS are available.
Display a few rows of the data to explore
'''
countries_df = pd.read_csv('./Jupyter/bss_extracts/Country.csv')

# BSS Countries 
countries = [
    "Somalia", "Niger", "Malawi", "Madagascar", "Haiti", "Guatemala", 
    "Democratic Republic of the Congo", "Burkina Faso", "Liberia", 
    "Sierra Leone", "Senegal"
]

bss_countries = countries_df[countries_df['iso_en_name'].isin(countries)].copy()

# remove non printable characters from the iso3266a2 column
bss_countries.loc[:, 'iso3166a2'] = bss_countries['iso3166a2'].str.replace(r'[^\x20-\x7E]', '', regex=True)

# since original countries data received does not have iso_en_proper, create the column and make it equal to the name column
bss_countries.loc[:, 'iso_en_proper'] = bss_countries['iso_en_name']

# reset index for tidiness
bss_countries.reset_index(drop=True, inplace=True)


bss_countries.head()

Unnamed: 0,iso3166a2,iso3166a3,iso3166n3,fips104,iso_en_name,iso_en_ro_name,iso_en_ro_proper,iso_fr_name,iso_fr_proper,iso_es_name,fewsnet_pt_name,fewsnet_name,bgn_name,bgn_proper,bgn_longname,pcgn_name,pcgn_proper,pcgn_longname,fewsnet_capital,fewsnet_es_capital,fewsnet_fr_capital,fewsnet_pt_capital,fewsnet_ar_capital,bbox,centroid,fewsnet_c_latitude,fewsnet_c_longitude,continent,subcontinent,iso_region,iso_subregion,language,land,water,land_total,url_gov,url_stats,url_gis,url_post,url_fews,iso_en_proper
0,BF,BFA,854.0,UV,Burkina Faso,Burkina Faso,Burkina Faso,Burkina Faso,Burkina Faso,Burkina Faso,,,Burkina Faso,Burkina Faso,Burkina Faso,Burkina Faso,Burkina Faso,Burkina Faso,Ouagadougou,Uagadugú,Ouagadougou,Ouagadougou,واغادوغو,"(-5.518916130222894, 9.401107788219365, 2.4053...","(-2.0, 13.0)",12.370278,-1.524722,AF,,,,fr,273800.0,400.0,274200.0,gouvernement.gov.bf,insd.bf,,,https://fews.net/west-africa/burkina-faso,Burkina Faso
1,GT,GTM,320.0,GT,Guatemala,Guatemala,Guatemala,Guatemala,Guatemala,Guatemala,,,Guatemala,Guatemala,Republic of Guatemala,Guatemala,Guatemala,Republic of Guatemala,Guatemala City,Ciudad De Guatemala,Guatemala,Cidade De Guatemala,مدينة غواتيمالا,"(-92.24149322499997, 13.737300872579784, -88.2...","(-90.25, 15.5)",14.621111,-90.526944,,,,,"es,myn",107159.0,1730.0,108889.0,guatemala.gob.gt,ine.gob.gt,,elcorreo.com.gt/elcorreo/index.php,https://fews.net/latin-america-and-caribbean/g...,Guatemala
2,HT,HTI,332.0,HA,Haiti,Haiti,Haiti,Haïti,Haiti,Haití,,,Haiti,Haiti,Republic of Haiti,Haiti,Haiti,Republic of Haiti,Port-au-Prince,Puerto Príncipe,Port-Au-Prince,Porto Príncipe,بورت أو برنس,"(-74.4811553959999, 18.023044586000083, -71.61...","(-72.4166666667, 19.0)",18.539167,-72.335,,CA,,,"fr,ht",27560.0,190.0,27750.0,gov.ht,ihsi.ht,,,https://fews.net/latin-america-and-caribbean/h...,Haiti
3,LR,LBR,430.0,LI,Liberia,Liberia,Liberia,Libéria,Libéria,Liberia,,,Liberia,Liberia,Republic of Liberia,Liberia,Liberia,Republic of Liberia,Monrovia,Monrovia,Monrovia,Monróvia,مونروفيا,"(-11.492082595645089, 4.353056908086609, -7.36...","(-9.5, 6.5)",6.310556,-10.804722,AF,,,,en,96320.0,15049.0,111369.0,emansion.gov.lr,lisgis.org,,,https://fews.net/west-africa/liberia,Liberia
4,MG,MDG,450.0,MA,Madagascar,Madagascar,Madagascar,Madagascar,Madagascar,Madagascar,,,Madagascar,Madagascar,Republic of Madagascar,Madagascar,Madagascar,Republic of Madagascar,Antananarivo,Antananarivo,Antananarivo,Antananarivo,أنتاناناريفو,"(43.17692448000008, -25.605752595999945, 50.48...","(47.0, -20.0)",-18.916667,47.516667,AF,,,,"fr,mg",581540.0,5501.0,587041.0,madagascar.gov.mg,instat.mg,ftm.mg,,https://fews.net/southern-africa/madagascar,Madagascar


### Explore the modesl to see what fields are needed while adding data based on dependencies

### Add Countries data to the Country table

In [6]:
from common.models import Country

for index, row in bss_countries.iterrows():
    # creating new instances:
    model_instance = Country(
        iso3166a2=row['iso3166a2'],
        iso3166a3=row['iso3166a3'],
        iso_en_name=row['iso_en_name'],
        name=row['iso_en_name'],
        iso3166n3=row['iso3166n3'],
        iso_en_proper=row['iso_en_proper'],
        iso_en_ro_name=row['iso_en_ro_name'],
        iso_en_ro_proper=row['iso_en_ro_proper'],
        iso_fr_name=row['iso_fr_name'], 
        iso_fr_proper=row['iso_fr_proper'],
        iso_es_name=row['iso_es_name'],
        
    )
    model_instance.save()


### Add wealth categories

In [7]:
from metadata.models import WealthCategory
fields = WealthCategory._meta.fields
fields

(<model_utils.fields.AutoCreatedField: created>,
 <model_utils.fields.AutoLastModifiedField: modified>,
 <common.fields.CodeField: code>,
 <common.fields.NameField: name>,
 <common.fields.DescriptionField: description>,
 <django.db.models.fields.PositiveSmallIntegerField: ordering>,
 <django.db.models.fields.json.JSONField: aliases>)

In [8]:
wealth_cat_df = pd.read_csv('./Jupyter/bss_extracts/wealth_categories.csv')

from metadata.models import WealthCategory

for index, row in wealth_cat_df.iterrows():
    
    # Assuming we're creating new instances:
    model_instance = WealthCategory(
        code=row['code'],
        name=row['description'],
        description=row['description'],
    )
    model_instance.save()


### Add LivelihoodZone data

In [9]:
from baseline.models import LivelihoodZone
fields = LivelihoodZone._meta.fields

fields

(<model_utils.fields.AutoCreatedField: created>,
 <model_utils.fields.AutoLastModifiedField: modified>,
 <django.db.models.fields.CharField: code>,
 <common.fields.NameField: name>,
 <common.fields.DescriptionField: description>,
 <django.db.models.fields.related.ForeignKey: country>)

In [10]:
df_LZ = pd.read_csv('./Jupyter/bss_extracts/LZ.csv')
df_LZ.bss_country = df_LZ['bss_country'].str.capitalize()

df_LZ.head()

Unnamed: 0.1,Unnamed: 0,livelihood_zone,bss_country,code,bss_year
0,0,Mzimba Self-Sufficient,Malawi,MWMZS_30Sep15,2015-01-01
1,347,MISUKU HILLS,Malawi,MWMSK_30Sep15,2015-01-01
2,694,Phalombe Livelihood Zone (PHA),Malawi,MWPHA_30Sep15,2015-01-01
3,1041,Phirilongwe Livelihood Zone (PHI),Malawi,MWPHI_30Sep15,2015-01-01
4,1388,Rift Valley Escarpment,Malawi,MWRVE_30Sep15,2015-01-01


In [11]:

from baseline.models import LivelihoodZone
from common.models import Country

for index, row in df_LZ.iterrows():
    try:
        country_obj = Country.objects.get(name=row['bss_country'])  
        
        # Create a new instance of LivelihoodZone
        model_instance = LivelihoodZone(
            code=row['code'],
            name=row['livelihood_zone'],
            # description=row['bss_name'],
            country=country_obj  # Set the country field to the Country instance
        )
        model_instance.save()
        print(f"Saved LivelihoodZone with code: {row['bss_name']}")
    
    except Country.DoesNotExist:
        print(f"Country not found for name: {row['bss_country']}")
    except Exception as e:
        print(f"Error on row {index}: {e}")



Error on row 0: 'bss_name'
Error on row 1: 'bss_name'
Error on row 2: 'bss_name'
Error on row 3: 'bss_name'
Error on row 4: 'bss_name'
Error on row 5: 'bss_name'
Error on row 6: 'bss_name'
Error on row 7: 'bss_name'
Error on row 8: 'bss_name'
Error on row 9: 'bss_name'
Error on row 10: 'bss_name'
Error on row 11: 'bss_name'
Error on row 12: 'bss_name'
Error on row 13: 'bss_name'
Error on row 14: 'bss_name'
Error on row 15: 'bss_name'
Error on row 16: 'bss_name'
Error on row 17: 'bss_name'


### Add Livelihood categories

In [12]:
from metadata.models import LivelihoodCategory
fields = LivelihoodCategory._meta.fields
fields

(<model_utils.fields.AutoCreatedField: created>,
 <model_utils.fields.AutoLastModifiedField: modified>,
 <common.fields.CodeField: code>,
 <common.fields.NameField: name>,
 <common.fields.DescriptionField: description>,
 <django.db.models.fields.PositiveSmallIntegerField: ordering>,
 <django.db.models.fields.json.JSONField: aliases>)

In [13]:
LZ_baseline = pd.read_csv('./Jupyter/bss_extracts/LZ_baseline.csv')

from metadata.models import LivelihoodCategory

for index, row in LZ_baseline.iterrows():
    # creating new instances:
    model_instance = LivelihoodCategory(
        code=row['code'],
        name=row['category'],
        description=row['category'],
        
    )
    model_instance.save()


### Adding source organizations

In [14]:
from baseline.models import SourceOrganization
fields = SourceOrganization._meta.fields
fields

(<django.db.models.fields.BigAutoField: id>,
 <model_utils.fields.AutoCreatedField: created>,
 <model_utils.fields.AutoLastModifiedField: modified>,
 <common.fields.NameField: name>,
 <common.fields.NameField: full_name>,
 <common.fields.DescriptionField: description>)

In [15]:
source_org = pd.read_csv('Jupyter/bss_extracts/source_org.csv')
from baseline.models import SourceOrganization

for index, row in source_org.iterrows():

    model_instance = SourceOrganization(
        name=row['name'],
        full_name=row['full_name'],
        description=row['full_name'],
        
    )
    model_instance.save()

IntegrityError: duplicate key value violates unique constraint "baseline_sourceorganization_name_key"
DETAIL:  Key (name)=(FEWSNET) already exists.


### Add Livelihood_baseline

In [16]:
from baseline.models import LivelihoodZoneBaseline
fields = LivelihoodZoneBaseline._meta.fields

fields

(<django.db.models.fields.BigAutoField: id>,
 <model_utils.fields.AutoCreatedField: created>,
 <model_utils.fields.AutoLastModifiedField: modified>,
 <django.db.models.fields.related.ForeignKey: livelihood_zone>,
 <django.contrib.gis.db.models.fields.MultiPolygonField: geography>,
 <django.db.models.fields.related.ForeignKey: main_livelihood_category>,
 <django.db.models.fields.related.ForeignKey: source_organization>,
 <django.db.models.fields.files.FileField: bss>,
 <django.db.models.fields.DateField: reference_year_start_date>,
 <django.db.models.fields.DateField: reference_year_end_date>,
 <django.db.models.fields.DateField: valid_from_date>,
 <django.db.models.fields.DateField: valid_to_date>,
 <django.db.models.fields.CharField: population_source>,
 <django.db.models.fields.PositiveIntegerField: population_estimate>)

In [17]:
LZ_baseline['geography'] = bss_countries['bbox']
LZ_baseline.head()

Unnamed: 0.1,Unnamed: 0,livelihood_zone,category,file_url,bss_year,code,source_organization,geography
0,27,Mzimba Self-Sufficient,Agriculture,MWMZS_30Sep15.xls,2015-01-01,A,Other,"(-5.518916130222894, 9.401107788219365, 2.4053..."
1,31,Mzimba Self-Sufficient,Pastoral,MWMZS_30Sep15.xls,2015-01-01,P,Other,"(-92.24149322499997, 13.737300872579784, -88.2..."
2,39,Mzimba Self-Sufficient,Pastoral,MWMZS_30Sep15.xls,2015-01-01,P,SC,"(-74.4811553959999, 18.023044586000083, -71.61..."
3,47,Mzimba Self-Sufficient,Pastoral,MWMZS_30Sep15.xls,2015-01-01,P,FEWSNET,"(-11.492082595645089, 4.353056908086609, -7.36..."
4,55,Mzimba Self-Sufficient,Pastoral,MWMZS_30Sep15.xls,2015-01-01,P,Other,"(43.17692448000008, -25.605752595999945, 50.48..."


In [18]:
from baseline.models import LivelihoodZoneBaseline, LivelihoodZone, SourceOrganization
from metadata.models import LivelihoodCategory

for index, row in LZ_baseline.iterrows():
    # For ForeignKey fields, create the related instances
    livelihood_zone, _ = LivelihoodZone.objects.get_or_create(name=row['livelihood_zone'])
    main_livelihood_category, _ = LivelihoodCategory.objects.get_or_create(name=row['category'])
    Source_Organization, _ = SourceOrganization.objects.get_or_create(name=row['source_organization'])
    
    # Create a new instance of the model and populate the fields
    model_instance = LivelihoodZoneBaseline(
        livelihood_zone=livelihood_zone,
        main_livelihood_category=main_livelihood_category,
        # bss=row['file_url'],
        # geography=row['geography'],
        source_organization=Source_Organization,
        reference_year_start_date=row['bss_year'],
        reference_year_end_date=row['bss_year'],
        valid_from_date=row['bss_year'],
        valid_to_date=row['bss_year']
    )
    
    # Save the instance to the database
    model_instance.save()


### Adding community data

In [19]:
from baseline.models import Community
fields = Community._meta.fields
fields

(<django.db.models.fields.BigAutoField: id>,
 <model_utils.fields.AutoCreatedField: created>,
 <model_utils.fields.AutoLastModifiedField: modified>,
 <common.fields.NameField: name>,
 <django.db.models.fields.related.ForeignKey: livelihood_zone_baseline>,
 <django.contrib.gis.db.models.fields.GeometryField: geography>,
 <django.db.models.fields.CharField: interview_number>,
 <django.db.models.fields.CharField: interviewers>)

In [20]:
community_df = pd.read_csv('./Jupyter/bss_extracts/community_df.csv')
community_df.head()

Unnamed: 0.1,Unnamed: 0,District,Interview number:,Interviewers,Village,community,bss_name,livelihood_zone,bss_country,code,file_url,bss_year
0,0,Mzimba,1.0,,Chingati Chirwa,Mzimba:Chingati Chirwa,MALAWI HEA BASELINES 2015,Mzimba Self-Sufficient,MALAWI,MWMZS_30Sep15,MWMZS_30Sep15.xls,2015-01-01
1,1,Mzimba,1.0,,Chingati Chirwa,Mzimba:Chingati Chirwa,MALAWI HEA BASELINES 2015,Mzimba Self-Sufficient,MALAWI,MWMZS_30Sep15,MWMZS_30Sep15.xls,2015-01-01
2,2,Mzimba,1.0,,Chingati Chirwa,Mzimba:Chingati Chirwa,MALAWI HEA BASELINES 2015,Mzimba Self-Sufficient,MALAWI,MWMZS_30Sep15,MWMZS_30Sep15.xls,2015-01-01
3,3,Mzimba,1.0,,Chingati Chirwa,Mzimba:Chingati Chirwa,MALAWI HEA BASELINES 2015,Mzimba Self-Sufficient,MALAWI,MWMZS_30Sep15,MWMZS_30Sep15.xls,2015-01-01
4,4,Mzimba,2.0,,Sindani Chibambo,Mzimba:Sindani Chibambo,MALAWI HEA BASELINES 2015,Mzimba Self-Sufficient,MALAWI,MWMZS_30Sep15,MWMZS_30Sep15.xls,2015-01-01


In [21]:
from baseline.models import Community, LivelihoodZoneBaseline

for index, row in community_df.iterrows():
    try:
        # Fetch or create the related LivelihoodZoneBaseline instance
        lzb = LivelihoodZoneBaseline.objects.filter(livelihood_zone=row['code']).first()
        if lzb is None:
            # If no matching LivelihoodZoneBaseline, create one
            lzb = LivelihoodZoneBaseline.objects.create(livelihood_zone=row['code'])
        
        model_instance = Community(
            name=row['community'],
            livelihood_zone_baseline=lzb,
            # geography=row['District'], 
            interview_number=row['Interview number:'],
            interviewers=row['Interviewers']
        )
        model_instance.save()
    
    except Exception as e:
        print(f"Error when processing row {index}: {e}")
        print(row)

Error when processing row 172: value too long for type character varying(10)

Unnamed: 0                                                     172
District                                    Mzimba Self-Sufficient
Interview number:                           Mzimba Self-Sufficient
Interviewers                                Mzimba Self-Sufficient
Village                                     Mzimba Self-Sufficient
community            Mzimba Self-Sufficient:Mzimba Self-Sufficient
bss_name                                 MALAWI HEA BASELINES 2015
livelihood_zone                             Mzimba Self-Sufficient
bss_country                                                 MALAWI
code                                                 MWMZS_30Sep15
file_url                                         MWMZS_30Sep15.xls
bss_year                                                2015-01-01
Name: 172, dtype: object
Error when processing row 173: value too long for type character varying(10)

Unnamed: 0     

### Adding WealthGroup data

In [22]:
# load the app model
from baseline.models import WealthGroup
fields = WealthGroup._meta.fields

fields

(<django.db.models.fields.BigAutoField: id>,
 <model_utils.fields.AutoCreatedField: created>,
 <model_utils.fields.AutoLastModifiedField: modified>,
 <django.db.models.fields.related.ForeignKey: livelihood_zone_baseline>,
 <django.db.models.fields.related.ForeignKey: community>,
 <django.db.models.fields.related.ForeignKey: wealth_category>,
 <django.db.models.fields.PositiveSmallIntegerField: percentage_of_households>,
 <django.db.models.fields.PositiveSmallIntegerField: average_household_size>)

In [23]:
# wealth_group_df['wealth_category']

In [29]:
from django.db import DataError
# Read the CSV into a DataFrame
wealth_group_df = pd.read_csv("./Jupyter/bss_extracts/wealth_group.csv")
from metadata.models import WealthCategory
from baseline.models import WealthGroup, LivelihoodZoneBaseline, Community

for index, row in wealth_group_df.iterrows():
    try:
        # Fetch or create the LivelihoodZone instance
        lz, _ = LivelihoodZone.objects.get_or_create(name=row['livelihood_zone'])
        
        # Fetch the first matching LivelihoodZoneBaseline or create one if none exist
        lzb = LivelihoodZoneBaseline.objects.filter(livelihood_zone=lz).first()
        if not lzb:
            lzb = LivelihoodZoneBaseline.objects.create(livelihood_zone=lz)

        # Attempt to fetch the Community instance
        try:
            community_instance = Community.objects.get(name=row['community'])
        except Community.MultipleObjectsReturned:
            community_instance = Community.objects.filter(name=row['community']).first()
        except Community.DoesNotExist:
            community_instance = Community.objects.create(name=row['community'])

        wealth_category_instance, _ = WealthCategory.objects.get_or_create(code=row['wealth_category'])
        
        # Create and save the main model instance
        model_instance = WealthGroup(  # Replace 'YourModelName' with the name of your actual model
            livelihood_zone_baseline=lzb,
            community=community_instance,
            wealth_category=wealth_category_instance,
            percentage_of_households=row['percentage_of_hh'],
            average_household_size=row['average_household_size']
        )
        model_instance.save()

    except DataError:
        # Log the error for inspection
        print(f"Error with row {index}: {row}")

        # Continue to the next iteration, skipping the current problematic row
        continue


### Adding Wealth characteristics

In [30]:
# load the app model
from metadata.models import WealthCharacteristic
fields = WealthCharacteristic._meta.fields

fields

(<model_utils.fields.AutoCreatedField: created>,
 <model_utils.fields.AutoLastModifiedField: modified>,
 <common.fields.CodeField: code>,
 <common.fields.NameField: name>,
 <common.fields.DescriptionField: description>,
 <django.db.models.fields.PositiveSmallIntegerField: ordering>,
 <django.db.models.fields.json.JSONField: aliases>,
 <django.db.models.fields.CharField: variable_type>)

In [47]:
wc_df = pd.read_csv("./Jupyter/bss_extracts/Wealth_characteristics.csv")

from metadata.models import WealthCharacteristic

for index, row in wc_df.iterrows():
    try:
        # Create and save the main model instance
        model_instance = WealthCharacteristic(
            code=row['category'],
            name=row['wealth_group'],
            description=row['wealth_group'],
            # ordering=row['ordering'],
            # aliases=row['aliases'],
            # variable_type=row['variable_type']
        )
        model_instance.save()

    except DataError as e:
        print(f"DataError with row {index}: {row}. Error: {e}")
    except Exception as e:
        print(f"Unexpected error with row {index}: {row}. Error: {e}")