# model_1 — Outcome Prediction Model (Multi-class)

In [23]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("preprocessed_dengue_data.csv")


In [5]:
df.head()

Unnamed: 0,Age,Gender,Temperature,Humidity,Rainfall,Platelet_Count,Outcome,Month,Year,Age_group,...,Symptom_fatigue,Symptom_Fever,Symptom_headache,Symptom_joint pain,Symptom_nausea,Symptom_bleeding,Symptom_retro-orbital pain,Symptom_chills,Symptom_rash,Outcome_encoded
0,48,0,25.7,82.8,45.0,110619,Recovered,6,2022,2,...,0,1,0,1,0,0,0,0,0,3
1,19,1,29.8,63.6,0.0,62974,Hospitalized,4,2022,1,...,0,1,0,0,0,0,1,0,0,2
2,33,1,23.8,92.9,10.0,78755,Hospitalized,7,2023,1,...,0,1,0,0,0,0,1,0,0,2
3,49,0,26.0,79.1,1.0,20787,Critical,8,2022,2,...,0,1,0,0,0,1,0,0,0,0
4,44,0,25.0,82.9,60.0,134421,Recovered,7,2024,2,...,0,1,0,0,0,0,0,0,0,3


In [7]:
df.columns

Index(['Age', 'Gender', 'Temperature', 'Humidity', 'Rainfall',
       'Platelet_Count', 'Outcome', 'Month', 'Year', 'Age_group',
       'Ward_Banjara Hills', 'Ward_Begumpet', 'Ward_Charminar',
       'Ward_Gachibowli', 'Ward_Jubilee Hills', 'Ward_Kukatpally',
       'Ward_LB Nagar', 'Ward_Malakpet', 'Ward_Mehdipatnam',
       'Ward_Musheerabad', 'Ward_Quthbullapur', 'Ward_Secunderabad',
       'Ward_Serilingampally', 'Ward_Uppal', 'Symptom_muscle pain',
       'Symptom_fatigue', 'Symptom_Fever', 'Symptom_headache',
       'Symptom_joint pain', 'Symptom_nausea', 'Symptom_bleeding',
       'Symptom_retro-orbital pain', 'Symptom_chills', 'Symptom_rash',
       'Outcome_encoded'],
      dtype='object')

In [8]:
X = df.drop(columns=['Outcome', 'Outcome_encoded'])  # Adjust as needed
y = df['Outcome_encoded']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [10]:
## training model

clf = RandomForestClassifier(random_state=42)
clf.fit(X_train,y_train)


In [11]:
## evaluation
y_pred = clf.predict(X_test)

In [15]:
y_pred

array([3, 2, 0, ..., 2, 3, 2], dtype=int64)

In [24]:
print("Classification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       359
           1       0.00      0.00      0.00         6
           2       1.00      1.00      1.00       613
           3       1.00      1.00      1.00       617

    accuracy                           0.99      1595
   macro avg       0.75      0.75      0.75      1595
weighted avg       0.99      0.99      0.99      1595



In [25]:
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Confusion Matrix:
[[358   0   1   0]
 [  6   0   0   0]
 [  0   0 613   0]
 [  0   0   1 616]]


In [26]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")


Accuracy: 0.9950


In [28]:
# 6. Save Model
joblib.dump(clf, 'outcome_prediction_model_1.pkl')

['outcome_prediction_model_1.pkl']

In [33]:
import joblib
import numpy as np

# Load model
model = joblib.load('outcome_prediction_model_1.pkl')

# Replace with your full 33-feature input list
input_data = np.array([[
    # 9 Basic features
    33, 1, 25.0, 85.5, 10.0, 75000, 7, 2023, 2,

    # 13 One-hot Wards (example: 'Ward_Kukatpally' = 1)
    0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,

    # 10 Symptoms (e.g., 'Symptom_Fever', 'Symptom_headache' = 1)
    1, 0, 1, 0, 0, 0, 0, 0, 0, 0,

    # 1 Extra Feature (whatever that last column is — maybe a binary encoding?)
    0
]])


# Predict
pred = model.predict(input_data)

# Map or decode prediction
label_map = {0: "Critical", 1: "Dead", 2: "Hospitalized", 3: "Recovered"}
print(f"Predicted Outcome: {label_map[pred[0]]}")


Predicted Outcome: Hospitalized


In [34]:
df.head()

Unnamed: 0,Age,Gender,Temperature,Humidity,Rainfall,Platelet_Count,Outcome,Month,Year,Age_group,...,Symptom_fatigue,Symptom_Fever,Symptom_headache,Symptom_joint pain,Symptom_nausea,Symptom_bleeding,Symptom_retro-orbital pain,Symptom_chills,Symptom_rash,Outcome_encoded
0,48,0,25.7,82.8,45.0,110619,Recovered,6,2022,2,...,0,1,0,1,0,0,0,0,0,3
1,19,1,29.8,63.6,0.0,62974,Hospitalized,4,2022,1,...,0,1,0,0,0,0,1,0,0,2
2,33,1,23.8,92.9,10.0,78755,Hospitalized,7,2023,1,...,0,1,0,0,0,0,1,0,0,2
3,49,0,26.0,79.1,1.0,20787,Critical,8,2022,2,...,0,1,0,0,0,1,0,0,0,0
4,44,0,25.0,82.9,60.0,134421,Recovered,7,2024,2,...,0,1,0,0,0,0,0,0,0,3


In [35]:
df.shape

(7971, 35)

In [37]:
# Correct way to save a model
import pickle

with open('outcome_prediction_model1.pkl', 'wb') as f:
    pickle.dump(clf, f)
