In [None]:
import pandas as pd
import numpy as np

# Define parameters for synthetic data generation
num_regions = 1000  # Number of regions to simulate

# Disaster types and their severity determining factors
disaster_types = {
    "Flood": {"precipitation": (50, 300)},  # in mm
    "Storm": {"wind_speed": (60, 250)},  # in km/h
    "Earthquake": {"magnitude": (4.0, 9.0)},  # Richter scale
    "Drought": {"rainfall_deficit": (10, 80)},  # % decrease
}

# Supply factors per person
food_per_person = 5  # kg
water_per_person = 3  # liters
shelter_per_person = 0.2  # tents per person
medicine_per_elderly = 2  # medicine kits per 60+
sanitary_items_per_female = 1  # per female
baby_food_per_child = 1.5  # kg per 0-12 age child

# Generate synthetic dataset
np.random.seed(42)
data = []

for i in range(num_regions):
    region_id = f"Region_{i+1}"
    disaster = np.random.choice(list(disaster_types.keys()))
    severity_param, (min_val, max_val) = list(disaster_types[disaster].items())[0]
    severity_value = round(np.random.uniform(min_val, max_val), 2)

    total_population = np.random.randint(1000, 100000)
    age_0_12 = int(total_population * np.random.uniform(0.1, 0.3))
    age_12_60 = int(total_population * np.random.uniform(0.5, 0.7))
    age_60_plus = total_population - (age_0_12 + age_12_60)

    female_ratio = np.random.uniform(0.45, 0.55)
    female_population = int(total_population * female_ratio)

    # Compute aid requirements
    food_supply = total_population * food_per_person
    water_supply = total_population * water_per_person
    shelter_supply = total_population * shelter_per_person

    baby_food_supply = age_0_12 * baby_food_per_child
    medicine_supply = age_60_plus * medicine_per_elderly
    sanitary_supply = female_population * sanitary_items_per_female

    # Store data
    data.append([
        region_id, disaster, severity_param, severity_value, total_population,
        age_0_12, age_12_60, age_60_plus, female_population,
        food_supply, water_supply, shelter_supply,
        baby_food_supply, medicine_supply, sanitary_supply
    ])

# Create DataFrame
df = pd.DataFrame(data, columns=[
    "Region", "Disaster Type", "Severity Parameter", "Severity Value", "Total Population",
    "Age 0-12", "Age 12-60", "Age 60+", "Female Population",
    "Food Supply (kg)", "Water Supply (liters)", "Shelter Supply (tents)",
    "Baby Food Supply (kg)", "Medicine Supply (kits)", "Sanitary Supply (items)"
])

# Save to CSV
df.to_csv("disaster_relief_data.csv", index=False)

# Display sample data
print(df.head())


     Region Disaster Type Severity Parameter  Severity Value  \
0  Region_1    Earthquake          magnitude            7.98   
1  Region_2       Drought   rainfall_deficit           33.36   
2  Region_3       Drought   rainfall_deficit           75.70   
3  Region_4         Storm         wind_speed           61.34   
4  Region_5       Drought   rainfall_deficit           78.16   

   Total Population  Age 0-12  Age 12-60  Age 60+  Female Population  \
0             77820     10210      41337    26273              35471   
1             61263     14802      30883    15578              33510   
2             68969      9404      37014    22551              33134   
3             94016     14877      58512    20627              43618   
4             60150     11501      39520     9129              28268   

   Food Supply (kg)  Water Supply (liters)  Shelter Supply (tents)  \
0            389100                 233460                 15564.0   
1            306315                 183789

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error, r2_score

# Load dataset
df = pd.read_csv("disaster_relief_data.csv")

# Define features and target variables
features = [
    "Disaster Type", "Severity Value", "Total Population", "Age 0-12", "Age 12-60", "Age 60+", "Female Population"
]
targets = [
    "Food Supply (kg)", "Water Supply (liters)", "Shelter Supply (tents)",
    "Baby Food Supply (kg)", "Medicine Supply (kits)", "Sanitary Supply (items)"
]

X = df[features]
y = df[targets]

# Preprocessing: Encode categorical variables and scale numerical values
categorical_features = ["Disaster Type"]
numerical_features = ["Severity Value", "Total Population", "Age 0-12", "Age 12-60", "Age 60+", "Female Population"]

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown='ignore'), categorical_features),
    ("num", StandardScaler(), numerical_features)
])

# Define the model pipeline
model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=100, random_state=42))
])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Absolute Error: {mae:.2f}")
print(f"R² Score: {r2:.2f}")

# Function to predict aid requirements for a new scenario
def predict_aid(disaster, severity, population, age_0_12, age_12_60, age_60_plus, female_pop):
    input_data = pd.DataFrame([[disaster, severity, population, age_0_12, age_12_60, age_60_plus, female_pop]], columns=features)
    prediction = model.predict(input_data)
    return dict(zip(targets, prediction[0]))

# Example prediction
example = predict_aid("Flood", 180, 50000, 10000, 30000, 10000, 25000)
print("Predicted Aid Requirements:", example)

Mean Absolute Error: 1048.74
R² Score: 0.98
Predicted Aid Requirements: {'Food Supply (kg)': np.float64(247680.75), 'Water Supply (liters)': np.float64(148608.45), 'Shelter Supply (tents)': np.float64(9907.23), 'Baby Food Supply (kg)': np.float64(14708.91), 'Medicine Supply (kits)': np.float64(20280.5), 'Sanitary Supply (items)': np.float64(24911.04)}


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error, r2_score

# Load dataset
df = pd.read_csv("disaster_relief_data.csv")

# Define features and target variables
features = [
    "Disaster Type", "Severity Value", "Total Population", "Age 0-12", "Age 12-60", "Age 60+", "Female Population"
]
targets = [
    "Food Supply (kg)", "Water Supply (liters)", "Shelter Supply (tents)",
    "Baby Food Supply (kg)", "Medicine Supply (kits)", "Sanitary Supply (items)"
]

X = df[features]
y = df[targets]

# Preprocessing: Encode categorical variables and scale numerical values
categorical_features = ["Disaster Type"]
numerical_features = ["Severity Value", "Total Population", "Age 0-12", "Age 12-60", "Age 60+", "Female Population"]

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown='ignore'), categorical_features),
    ("num", StandardScaler(), numerical_features)
])

# Define the model pipeline
pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(random_state=42))
])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter tuning with GridSearchCV
param_grid = {
    "regressor__n_estimators": [100, 200, 300],
    "regressor__max_depth": [10, 20, None],
    "regressor__min_samples_split": [2, 5, 10],
    "regressor__min_samples_leaf": [1, 2, 4]
}

grid_search = GridSearchCV(pipeline, param_grid, cv=3, scoring="neg_mean_absolute_error", n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

# Best model
tuned_model = grid_search.best_estimator_

# Evaluate the tuned model
y_pred = tuned_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Optimized Mean Absolute Error: {mae:.2f}")
print(f"Optimized R² Score: {r2:.2f}")
print("Best Parameters:", grid_search.best_params_)

# Function to predict aid requirements for a new scenario
def predict_aid(disaster, severity, population, age_0_12, age_12_60, age_60_plus, female_pop):
    input_data = pd.DataFrame([[disaster, severity, population, age_0_12, age_12_60, age_60_plus, female_pop]], columns=features)
    prediction = tuned_model.predict(input_data)
    return dict(zip(targets, prediction[0]))

# Example prediction
example = predict_aid("Flood", 180, 50000, 10000, 30000, 10000, 25000)
print("Predicted Aid Requirements:", example)

Fitting 3 folds for each of 81 candidates, totalling 243 fits
Optimized Mean Absolute Error: 1023.79
Optimized R² Score: 0.98
Best Parameters: {'regressor__max_depth': 10, 'regressor__min_samples_leaf': 1, 'regressor__min_samples_split': 2, 'regressor__n_estimators': 300}
Predicted Aid Requirements: {'Food Supply (kg)': np.float64(246902.35), 'Water Supply (liters)': np.float64(148141.41), 'Shelter Supply (tents)': np.float64(9876.093999999997), 'Baby Food Supply (kg)': np.float64(14987.4775), 'Medicine Supply (kits)': np.float64(20087.076666666668), 'Sanitary Supply (items)': np.float64(24736.806666666667)}


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error, r2_score

# Load dataset
df = pd.read_csv("disaster_relief_data.csv")

# Define features and target variables
features = [
    "Disaster Type", "Severity Value", "Total Population", "Age 0-12", "Age 12-60", "Age 60+", "Female Population"
]
targets = [
    "Food Supply (kg)", "Water Supply (liters)", "Shelter Supply (tents)",
    "Baby Food Supply (kg)", "Medicine Supply (kits)", "Sanitary Supply (items)"
]

X = df[features]
y = df[targets]

# Preprocessing: Encode categorical variables and scale numerical values
categorical_features = ["Disaster Type"]
numerical_features = ["Severity Value", "Total Population", "Age 0-12", "Age 12-60", "Age 60+", "Female Population"]

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown='ignore'), categorical_features),
    ("num", StandardScaler(), numerical_features)
])

# Define the model pipeline
pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(random_state=42))
])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter tuning with GridSearchCV
param_grid = {
    "regressor__n_estimators": [100, 200, 300],
    "regressor__max_depth": [10, 20, None],
    "regressor__min_samples_split": [2, 5, 10],
    "regressor__min_samples_leaf": [1, 2, 4]
}

grid_search = GridSearchCV(pipeline, param_grid, cv=3, scoring="neg_mean_absolute_error", n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

# Best model
tuned_model = grid_search.best_estimator_

# Evaluate the tuned model
y_train_pred = tuned_model.predict(X_train)
y_test_pred = tuned_model.predict(X_test)

mae = mean_absolute_error(y_test, y_test_pred)
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)

print(f"Optimized Mean Absolute Error: {mae:.2f}")
print(f"Train R² Score: {r2_train:.2f}")
print(f"Test R² Score: {r2_test:.2f}")
print("Best Parameters:", grid_search.best_params_)

# Function to predict aid requirements for a new scenario
def predict_aid(disaster, severity, population, age_0_12, age_12_60, age_60_plus, female_pop):
    input_data = pd.DataFrame([[disaster, severity, population, age_0_12, age_12_60, age_60_plus, female_pop]], columns=features)
    prediction = tuned_model.predict(input_data)
    return dict(zip(targets, prediction[0]))

# Example prediction
example = predict_aid("Flood", 180, 50000, 10000, 30000, 10000, 25000)
print("Predicted Aid Requirements:", example)

Fitting 3 folds for each of 81 candidates, totalling 243 fits
Optimized Mean Absolute Error: 1023.79
Train R² Score: 1.00
Test R² Score: 0.98
Best Parameters: {'regressor__max_depth': 10, 'regressor__min_samples_leaf': 1, 'regressor__min_samples_split': 2, 'regressor__n_estimators': 300}
Predicted Aid Requirements: {'Food Supply (kg)': np.float64(246902.35), 'Water Supply (liters)': np.float64(148141.41), 'Shelter Supply (tents)': np.float64(9876.093999999997), 'Baby Food Supply (kg)': np.float64(14987.4775), 'Medicine Supply (kits)': np.float64(20087.076666666668), 'Sanitary Supply (items)': np.float64(24736.806666666667)}


In [None]:
from sklearn.metrics import r2_score, mean_absolute_error

# Predict on training and testing data
y_train_pred = tuned_model.predict(X_train)
y_test_pred = tuned_model.predict(X_test)

# Calculate R² scores
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)

# Calculate Mean Absolute Error (MAE)
mae_train = mean_absolute_error(y_train, y_train_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)

# Print results
print(f"Train R² Score: {r2_train:.2f}")
print(f"Test R² Score: {r2_test:.2f}")
print(f"Train MAE: {mae_train:.2f}")
print(f"Test MAE: {mae_test:.2f}")


Train R² Score: 1.00
Test R² Score: 0.98
Train MAE: 438.29
Test MAE: 1023.79


In [None]:
from sklearn.model_selection import GridSearchCV

# Define parameter grid for tuning
param_grid = {
    "regressor__n_estimators": [50, 100, 200],  # Reduce the number of trees
    "regressor__max_depth": [5, 10, 15],  # Limit tree depth to prevent overfitting
    "regressor__min_samples_split": [5, 10, 15],  # Require more samples to split
    "regressor__min_samples_leaf": [2, 4, 6]  # Require more samples in leaf nodes
}

# Grid search with cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring="neg_mean_absolute_error", n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

# Best model after tuning
tuned_model = grid_search.best_estimator_

# Evaluate the tuned model
y_train_pred = tuned_model.predict(X_train)
y_test_pred = tuned_model.predict(X_test)

r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)

print(f"Optimized Train R² Score: {r2_train:.2f}")
print(f"Optimized Test R² Score: {r2_test:.2f}")
print("Best Parameters:", grid_search.best_params_)


Fitting 5 folds for each of 81 candidates, totalling 405 fits
Optimized Train R² Score: 0.99
Optimized Test R² Score: 0.98
Best Parameters: {'regressor__max_depth': 15, 'regressor__min_samples_leaf': 2, 'regressor__min_samples_split': 5, 'regressor__n_estimators': 200}


In [None]:
# Define parameter grid for tuning (more restrictions)
param_grid = {
    "regressor__n_estimators": [50, 100],  # Reduce tree count
    "regressor__max_depth": [5, 10],  # Reduce depth to prevent memorization
    "regressor__min_samples_split": [10, 20],  # Require more samples to split
    "regressor__min_samples_leaf": [5, 10]  # Require larger leaf nodes
}

# Grid search with cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring="neg_mean_absolute_error", n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

# Best model after tuning
tuned_model = grid_search.best_estimator_

# Evaluate the tuned model
y_train_pred = tuned_model.predict(X_train)
y_test_pred = tuned_model.predict(X_test)

r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)

print(f"Optimized Train R² Score: {r2_train:.2f}")
print(f"Optimized Test R² Score: {r2_test:.2f}")
print("Best Parameters:", grid_search.best_params_)


Fitting 5 folds for each of 16 candidates, totalling 80 fits
Optimized Train R² Score: 0.97
Optimized Test R² Score: 0.96
Best Parameters: {'regressor__max_depth': 10, 'regressor__min_samples_leaf': 5, 'regressor__min_samples_split': 10, 'regressor__n_estimators': 100}


In [None]:
from sklearn.metrics import r2_score, mean_absolute_error

# Predict on training and testing data
y_train_pred = tuned_model.predict(X_train)
y_test_pred = tuned_model.predict(X_test)

# Calculate R² scores
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)

# Calculate Mean Absolute Error (MAE)
mae_train = mean_absolute_error(y_train, y_train_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)

# Print results
print(f"Train R² Score: {r2_train:.2f}")
print(f"Test R² Score: {r2_test:.2f}")
print(f"Train MAE: {mae_train:.2f}")
print(f"Test MAE: {mae_test:.2f}")


Train R² Score: 0.97
Test R² Score: 0.96
Train MAE: 1246.25
Test MAE: 1432.97


In [None]:
# Example test case: Predict aid requirements for a new disaster scenario
example_scenario = {
    "Disaster Type": "Flood",  # Disaster type (categorical)
    "Severity Value": 150,  # Severity of disaster
    "Total Population": 60000,  # Total people affected
    "Age 0-12": 12000,  # Children
    "Age 12-60": 40000,  # Adults
    "Age 60+": 8000,  # Elderly
    "Female Population": 30000  # Number of females in affected area
}

# Convert example input to DataFrame
example_df = pd.DataFrame([example_scenario])

# Predict using the tuned model
predicted_aid = tuned_model.predict(example_df)

# Display predictions
predicted_result = dict(zip(targets, predicted_aid[0]))
# print("Predicted Aid Requirements:", predicted_result)
# Convert predicted results to a readable format
predicted_result_clean = {key: round(float(value)) for key, value in predicted_result.items()}

# Print clean output
print("Predicted Aid Requirements:", predicted_result_clean)


Predicted Aid Requirements: {'Food Supply (kg)': 299961, 'Water Supply (liters)': 179976, 'Shelter Supply (tents)': 11998, 'Baby Food Supply (kg)': 21270, 'Medicine Supply (kits)': 14782, 'Sanitary Supply (items)': 29628}


In [None]:
from sklearn.model_selection import RandomizedSearchCV

# Expanded parameter grid for better fine-tuning
param_grid = {
    "regressor__n_estimators": [50, 100, 150, 200],  # More trees to balance bias-variance
    "regressor__max_depth": [5, 10, 15, 20],  # More depth options to optimize model complexity
    "regressor__min_samples_split": [5, 10, 15, 20],  # Ensure enough data per split
    "regressor__min_samples_leaf": [2, 5, 10, 15]  # Larger leaves to reduce overfitting
}

# Randomized Search for faster tuning
random_search = RandomizedSearchCV(
    pipeline, param_grid, cv=5, scoring="neg_mean_absolute_error",
    n_jobs=-1, verbose=1, n_iter=20, random_state=42
)
random_search.fit(X_train, y_train)

# Best model after tuning
tuned_model = random_search.best_estimator_

# Evaluate the tuned model
y_train_pred = tuned_model.predict(X_train)
y_test_pred = tuned_model.predict(X_test)

# Calculate R² scores and MAE
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)
mae_train = mean_absolute_error(y_train, y_train_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)

# Print results
print(f"Optimized Train R² Score: {r2_train:.2f}")
print(f"Optimized Test R² Score: {r2_test:.2f}")
print(f"Optimized Train MAE: {mae_train:.2f}")
print(f"Optimized Test MAE: {mae_test:.2f}")
print("Best Parameters:", random_search.best_params_)


Fitting 5 folds for each of 20 candidates, totalling 100 fits
Optimized Train R² Score: 0.98
Optimized Test R² Score: 0.97
Optimized Train MAE: 1225.34
Optimized Test MAE: 1416.69
Best Parameters: {'regressor__n_estimators': 200, 'regressor__min_samples_split': 10, 'regressor__min_samples_leaf': 2, 'regressor__max_depth': 20}


In [None]:
from prettytable import PrettyTable

# Example test case: Predict aid requirements for a new disaster scenario
example_scenario = {
    "Disaster Type": "Flood",  # Disaster type (categorical)
    "Severity Value": 150,  # Severity of disaster
    "Total Population": 60000,  # Total people affected
    "Age 0-12": 12000,  # Children
    "Age 12-60": 40000,  # Adults
    "Age 60+": 8000,  # Elderly
    "Female Population": 30000  # Number of females in affected area
}

# Convert example input to DataFrame
example_df = pd.DataFrame([example_scenario])

# Predict using the tuned model
predicted_aid = tuned_model.predict(example_df)

# Convert predictions to a readable format
predicted_result = dict(zip(targets, predicted_aid[0]))
predicted_result_clean = {key: round(float(value)) for key, value in predicted_result.items()}

# Create a table for better visualization
table = PrettyTable()
table.field_names = ["Aid Type", "Predicted Quantity"]

for key, value in predicted_result_clean.items():
    table.add_row([key, value])

# Print the organized table
print("\n📌 Predicted Aid Requirements for Disaster Scenario\n")
print(table)



📌 Predicted Aid Requirements for Disaster Scenario

+-------------------------+--------------------+
|         Aid Type        | Predicted Quantity |
+-------------------------+--------------------+
|     Food Supply (kg)    |       299804       |
|  Water Supply (liters)  |       179882       |
|  Shelter Supply (tents) |       11992        |
|  Baby Food Supply (kg)  |       20772        |
|  Medicine Supply (kits) |       16808        |
| Sanitary Supply (items) |       29602        |
+-------------------------+--------------------+


In [None]:
import pickle

# Save the trained model
filename = "disaster_relief_model.pkl"
with open(filename, "wb") as file:
    pickle.dump(tuned_model, file)

print("Model saved as:", filename)


Model saved as: disaster_relief_model.pkl
