In [1]:
import pandas as pd
import numpy as np

In [2]:
# --- Configuration ---
NUM_RECORDS = 150 # Number of past disaster response events
REGIONS = ['Turkana', 'Mandera', 'Wajir', 'Marsabit', 'Isiolo']
YEARS = np.arange(2010, 2024)

In [3]:
# --- Generate Data ---
data = {
    # Event ID
    'Event_ID': [f'DRC-{i:03d}' for i in range(1, NUM_RECORDS + 1)],

    # Region and Year
    'Region': np.random.choice(REGIONS, NUM_RECORDS),
    'Year': np.random.choice(YEARS, NUM_RECORDS),

    # Type of Disaster (Focusing on Droughts, but including floods for variability)
    'Disaster_Type': np.random.choice(['Drought', 'Drought', 'Drought', 'Flood', 'Conflict'], NUM_RECORDS, p=[0.6, 0.2, 0.1, 0.05, 0.05]),

    # Affected Population (Proxy for need)
    # Most affected populations are between 500 and 15000, with an average around 3000
    'Affected_Population': np.random.randint(500, 15000, NUM_RECORDS),
}


In [4]:
df = pd.DataFrame(data)

# NFIs are distributed based on a fraction of the affected population
# Calculate base response rate
df['Base_Response_Rate'] = np.random.uniform(0.1, 0.8, NUM_RECORDS)

# Blankets: Distributed to about 50-70% of affected population * 1 unit per person
df['Blankets_Distributed'] = (df['Affected_Population'] * df['Base_Response_Rate'] * np.random.uniform(0.5, 0.7, NUM_RECORDS)).round(0).astype(int)

# Jerrycans: Distributed to about 30-50% of affected population * 1 unit per person
df['Jerrycans_Distributed'] = (df['Affected_Population'] * df['Base_Response_Rate'] * np.random.uniform(0.3, 0.5, NUM_RECORDS)).round(0).astype(int)

# Hygiene_Kits: Distributed to about 20-40% of affected population (often one kit per family of 5, so lower rate)
df['Hygiene_Kits_Distributed'] = (df['Affected_Population'] * df['Base_Response_Rate'] * np.random.uniform(0.2, 0.4, NUM_RECORDS)).round(0).astype(int)

output_file = 'historical_response_data.csv'
df.to_csv(output_file, index=False)


In [5]:
print(f"Historical response data and saved to {output_file}")
print("\nFirst 5 rows of data:")
print(df.head())

Historical response data and saved to historical_response_data.csv

First 5 rows of data:
  Event_ID   Region  Year Disaster_Type  Affected_Population  \
0  DRC-001  Turkana  2023       Drought                13040   
1  DRC-002   Isiolo  2023       Drought                14112   
2  DRC-003    Wajir  2014       Drought                12629   
3  DRC-004  Mandera  2010       Drought                 3372   
4  DRC-005  Mandera  2012       Drought                 4187   

   Base_Response_Rate  Blankets_Distributed  Jerrycans_Distributed  \
0            0.442655                  3719                   1952   
1            0.614425                  5801                   3318   
2            0.586166                  3764                   3103   
3            0.782515                  1598                   1001   
4            0.546673                  1541                    850   

   Hygiene_Kits_Distributed  
0                      1204  
1                      3279  
2             