In [3]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler
import pickle

import warnings
warnings.filterwarnings("ignore")

In [4]:
df = pd.read_csv("preprocessed_dengue_data.csv")
df1 = pd.read_csv("cleaned_dengue_data.csv")

In [5]:
df1.head()

Unnamed: 0,Patient_ID,Age,Gender,Ward,Date_of_diagnosis,Temperature,Humidity,Rainfall,Symptoms,Platelet_Count,Diagnosis,Outcome,Month,Year,Age_group
0,P00002,48,F,Mehdipatnam,2022-06-21,25.7,82.8,45.0,"Fever, joint pain",110619,Dengue+,Recovered,6,2022,Adult
1,P00003,19,M,Serilingampally,2022-04-01,29.8,63.6,0.0,"Fever, retro-orbital pain",62974,Dengue+,Hospitalized,4,2022,Young Adult
2,P00006,33,M,Mehdipatnam,2023-07-26,23.8,92.9,10.0,"Fever, retro-orbital pain",78755,Dengue+,Hospitalized,7,2023,Young Adult
3,P00010,49,F,Kukatpally,2022-08-20,26.0,79.1,1.0,"Fever, bleeding",20787,Dengue+,Critical,8,2022,Adult
4,P00011,44,F,Musheerabad,2024-07-14,25.0,82.9,60.0,"Fever, muscle pain",134421,Dengue+,Recovered,7,2024,Adult


In [6]:
# For example, create ward-wise demand based on the count of patients per ward
ward_counts = df1['Ward'].value_counts()
ward_demand = {}

for ward in ward_counts.index:
    count = ward_counts[ward]
    if count >= 400:
        ward_demand[ward] = 2  # High Demand
    elif count >= 200:
        ward_demand[ward] = 1  # Medium Demand
    else:
        ward_demand[ward] = 0  # Low Demand

df['Demand_encoded'] = df1['Ward'].map(ward_demand)


In [7]:
df1

Unnamed: 0,Patient_ID,Age,Gender,Ward,Date_of_diagnosis,Temperature,Humidity,Rainfall,Symptoms,Platelet_Count,Diagnosis,Outcome,Month,Year,Age_group
0,P00002,48,F,Mehdipatnam,2022-06-21,25.7,82.8,45.0,"Fever, joint pain",110619,Dengue+,Recovered,6,2022,Adult
1,P00003,19,M,Serilingampally,2022-04-01,29.8,63.6,0.0,"Fever, retro-orbital pain",62974,Dengue+,Hospitalized,4,2022,Young Adult
2,P00006,33,M,Mehdipatnam,2023-07-26,23.8,92.9,10.0,"Fever, retro-orbital pain",78755,Dengue+,Hospitalized,7,2023,Young Adult
3,P00010,49,F,Kukatpally,2022-08-20,26.0,79.1,1.0,"Fever, bleeding",20787,Dengue+,Critical,8,2022,Adult
4,P00011,44,F,Musheerabad,2024-07-14,25.0,82.9,60.0,"Fever, muscle pain",134421,Dengue+,Recovered,7,2024,Adult
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7966,P13524,20,M,Serilingampally,2022-08-07,24.1,92.5,3.0,"Fever, joint pain",136281,Dengue+,Recovered,8,2022,Young Adult
7967,P13525,34,M,Banjara Hills,2024-08-22,26.1,80.6,0.2,"Fever, muscle pain",127239,Dengue+,Recovered,8,2024,Young Adult
7968,P13526,27,M,Kukatpally,2023-05-05,27.4,71.4,2.6,"Fever, retro-orbital pain",24424,Dengue+,Critical,5,2023,Young Adult
7969,P13528,33,M,LB Nagar,2024-12-23,23.2,70.2,0.0,"Fever, headache, joint pain",33809,Dengue+,Critical,12,2024,Young Adult


In [8]:
df.shape

(7971, 36)

In [9]:
df1.shape

(7971, 15)

In [10]:
df.head()

Unnamed: 0,Age,Gender,Temperature,Humidity,Rainfall,Platelet_Count,Outcome,Month,Year,Age_group,...,Symptom_Fever,Symptom_headache,Symptom_joint pain,Symptom_nausea,Symptom_bleeding,Symptom_retro-orbital pain,Symptom_chills,Symptom_rash,Outcome_encoded,Demand_encoded
0,48,0,25.7,82.8,45.0,110619,Recovered,6,2022,2,...,1,0,1,0,0,0,0,0,3,2
1,19,1,29.8,63.6,0.0,62974,Hospitalized,4,2022,1,...,1,0,0,0,0,1,0,0,2,2
2,33,1,23.8,92.9,10.0,78755,Hospitalized,7,2023,1,...,1,0,0,0,0,1,0,0,2,2
3,49,0,26.0,79.1,1.0,20787,Critical,8,2022,2,...,1,0,0,0,1,0,0,0,0,2
4,44,0,25.0,82.9,60.0,134421,Recovered,7,2024,2,...,1,0,0,0,0,0,0,0,3,2


In [11]:
ward_columns = [col for col in df.columns if col.startswith("Ward_")]

df["Ward"] = df[ward_columns].idxmax(axis=1).str.replace("Ward_","")

In [12]:
ward_df = df.groupby("Ward").agg({
    "Outcome_encoded" : lambda x: (x==0).sum() / len(x),
    "Platelet_Count" : "mean",
    "Age" : "mean",
    'Temperature': 'mean',
    'Humidity': 'mean',
    'Rainfall': 'mean',
    'Outcome': 'count',  
    'Demand_encoded': 'first'
    
}).reset_index()

In [13]:
ward_df.rename(columns={
    "Outcome":"Total_Cases",
    "Outcome_encoded":"Critical_Rate"
},inplace=True)

In [14]:
ward_df

Unnamed: 0,Ward,Critical_Rate,Platelet_Count,Age,Temperature,Humidity,Rainfall,Total_Cases,Demand_encoded
0,Banjara Hills,0.230275,83958.516514,29.114679,26.162294,71.086881,4.347615,1090,2
1,Begumpet,0.235832,84762.639854,28.919561,26.301463,71.655027,3.7883,547,2
2,Charminar,0.214559,84404.48659,30.477011,26.335057,70.755939,3.831034,522,2
3,Gachibowli,0.22824,82726.96325,31.700193,26.263443,71.433269,4.303095,517,2
4,Jubilee Hills,0.247619,84576.487619,30.337143,26.028762,72.820571,4.200952,525,2
5,Kukatpally,0.231858,84664.454867,30.364602,26.316814,71.623717,4.392389,565,2
6,LB Nagar,0.244141,80585.664062,30.558594,26.198242,72.60293,5.785938,512,2
7,Malakpet,0.236295,82851.230624,31.298677,26.199433,70.52949,4.558979,529,2
8,Mehdipatnam,0.250903,81941.888087,29.222022,26.211011,71.702166,3.855776,554,2
9,Musheerabad,0.228628,83486.741551,28.747515,26.496024,70.569384,4.487078,503,2


In [15]:
X = ward_df.drop(columns=['Ward', 'Demand_encoded'])
y = ward_df['Demand_encoded']


In [16]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

model = RandomForestClassifier(random_state=42)
model.fit(X,y)


In [17]:
with open('ward_demand_model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [20]:
import pickle
import numpy as np

# Load model
with open('ward_demand_model.pkl', 'rb') as f:
    model = pickle.load(f)

# Prepare input (as numpy array)
input_data = np.array([[
    0.23,      # Critical_Rate
    111839.5,   # Platelet_Count
    29.1,      # Age
    26.16,     # Temperature
    71.08,     # Humidity
    4.34,      # Rainfall
    1090       # Total_Cases
]])

# Predict
prediction = model.predict(input_data)

print(f"Predicted Demand Level: {prediction[0]}")


Predicted Demand Level: 2
