In [1]:
import pandas as pd

# 1. Import and View the Data

In [36]:
df1_chirps = pd.read_csv('DRC_EnvironmentalData.csv')
df2_survey = pd.read_csv('DRC_DroughtSurvey_VillageLevel.csv')

In [37]:
# CHIRPS Data
# For a given territory, what is the top 8 years with the worse drought (1990-2020) in Season A (early) and Season B(late)
df1_chirps

Unnamed: 0.1,Unnamed: 0,territory,year,total,season,drought_rank
0,1,Bulungu,1990,640.990974,Season A,2.0
1,2,Bulungu,1991,645.312309,Season A,3.0
2,3,Bulungu,1992,,Season A,
3,4,Bulungu,1993,715.824785,Season A,7.0
4,5,Bulungu,1994,,Season A,
...,...,...,...,...,...,...
196,197,Kamonia,2019,,Season B,
197,198,Kamonia,2020,,Season B,
198,199,Kamonia,2021,,Season B,
199,200,Kamonia,2022,,Season B,


In [38]:
# SURVEY Data
# Each village does 1 survey, where they rank top 8 years with the worse drought
df2_survey

Unnamed: 0.1,Unnamed: 0,province,territory,sector,village,survey_id,rank,year,comments
0,1,Kwilu,Bulungu,Dwe,Nkwebe 2,1,1,2004,"Sécheresses , crise alimentaire et faim ex..."
1,2,Kwilu,Bulungu,Dwe,Nkwebe 2,1,2,2006,"Secheresse ,insuffisance alimentaire"
2,3,Kwilu,Bulungu,Dwe,Nkwebe 2,1,3,2012,Attaques des maladies (mosaique et secheres...
3,4,Kwilu,Bulungu,Dwe,Nkwebe 2,1,4,2007,Menace des cultures par les rats; carence ...
4,5,Kwilu,Bulungu,Dwe,Nkwebe 2,1,5,1995,Ravageurs des plantes par les sauterelles ...
...,...,...,...,...,...,...,...,...,...
795,796,Kasaï,Kamonia,TSHISUKU,TSHINOTA 2,100,4,2010,Absence des pluies
796,797,Kasaï,Kamonia,TSHISUKU,TSHINOTA 2,100,5,2022,Absence des pluies
797,798,Kasaï,Kamonia,TSHISUKU,TSHINOTA 2,100,6,2018,Absence des pluies dans la zone
798,799,Kasaï,Kamonia,TSHISUKU,TSHINOTA 2,100,7,2020,Absence des pluies


## NOTE: Understand the Order: Province > Territory > Sector > Village

In [39]:
# province -> territory -> sector -> village dataframe
province_territory_sector_village = df2_survey[["province", "territory", "sector", "village"]]
province_territory_sector_village = province_territory_sector_village.drop_duplicates()
print(province_territory_sector_village)

    province territory    sector       village
0      Kwilu   Bulungu       Dwe      Nkwebe 2
8      Kwilu   Bulungu       Dwe      Polombwa
16     Kwilu   Bulungu   Imbongo      Lundwa 1
24     Kwilu   Bulungu   Imbongo       Midingi
32     Kwilu   Bulungu   Imbongo        Ngamba
..       ...       ...       ...           ...
760    Kasaï   Kamonia  TSHIKAPA     TSHIMBUDI
768    Kasaï   Kamonia  TSHISUKU        MAKODI
776    Kasaï   Kamonia  TSHISUKU        MUKAMA
784    Kasaï   Kamonia  TSHISUKU  Q/ JACQUES 2
792    Kasaï   Kamonia  TSHISUKU    TSHINOTA 2

[88 rows x 4 columns]


# 2. Extract Season A and Season B from CHIRPS Data

In [40]:
# province -> territory dataframe
province_territory = df2_survey[["province", "territory"]]
province_territory = province_territory.drop_duplicates()
print(province_territory)

          province territory
0            Kwilu   Bulungu
272  Kasaï-Central     Demba
520          Kasaï   Kamonia


In [41]:
# {province -> territory} dictionary

province_territory_dict = {}

for index, row in province_territory.iterrows():
    province_territory_dict[row['territory']] = row['province']

print(province_territory_dict)

{'Bulungu': 'Kwilu', 'Demba': 'Kasaï-Central', 'Kamonia': 'Kasaï'}


In [42]:
# apply the dictionary to df1_chirps to get "province" column as well
df1_chirps["province"] = df1_chirps["territory"].apply(lambda x: province_territory_dict[x])

In [43]:
df1_chirps

Unnamed: 0.1,Unnamed: 0,territory,year,total,season,drought_rank,province
0,1,Bulungu,1990,640.990974,Season A,2.0,Kwilu
1,2,Bulungu,1991,645.312309,Season A,3.0,Kwilu
2,3,Bulungu,1992,,Season A,,Kwilu
3,4,Bulungu,1993,715.824785,Season A,7.0,Kwilu
4,5,Bulungu,1994,,Season A,,Kwilu
...,...,...,...,...,...,...,...
196,197,Kamonia,2019,,Season B,,Kasaï
197,198,Kamonia,2020,,Season B,,Kasaï
198,199,Kamonia,2021,,Season B,,Kasaï
199,200,Kamonia,2022,,Season B,,Kasaï


In [44]:
df1_chirps[df1_chirps["season"] == "Season A"].to_csv("DRC_EnvironmentalData_SeasonA.csv", index=False)
df1_chirps[df1_chirps["season"] == "Season B"].to_csv("DRC_EnvironmentalData_SeasonB.csv", index=False)