In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
import pandas as pd

In [None]:
data = pd.read_csv('/content/enhanced_disaster_response_dataset.csv')
data

Unnamed: 0,Threat_ID,Location,Disaster,Threat_Level,Required_Soldiers,Casualties_Estimated,Response_Time_Hours,Infrastructure_Damage,Evacuation_Required,Medical_Assistance_Needed
0,1,School,Flood,7,84,112,20.52,Medium,No,Yes
1,2,School,Tsunami,4,80,445,4.14,Medium,Yes,Yes
2,3,School,Earthquake,8,120,358,17.30,Low,Yes,Yes
3,4,Office,Tsunami,5,100,280,13.71,High,No,Yes
4,5,School,Tsunami,7,140,116,7.82,Medium,No,No
...,...,...,...,...,...,...,...,...,...,...
495,496,Office,Flood,1,12,164,18.31,High,Yes,No
496,497,Hospital,Fire,7,70,499,17.26,Medium,Yes,No
497,498,Mall,Tsunami,7,140,395,18.91,High,Yes,Yes
498,499,Hospital,Tsunami,9,180,483,4.29,Medium,No,Yes


In [None]:
df = pd.DataFrame(data)
df

Unnamed: 0,Threat_ID,Location,Disaster,Threat_Level,Required_Soldiers,Casualties_Estimated,Response_Time_Hours,Infrastructure_Damage,Evacuation_Required,Medical_Assistance_Needed
0,1,School,Flood,7,84,112,20.52,Medium,No,Yes
1,2,School,Tsunami,4,80,445,4.14,Medium,Yes,Yes
2,3,School,Earthquake,8,120,358,17.30,Low,Yes,Yes
3,4,Office,Tsunami,5,100,280,13.71,High,No,Yes
4,5,School,Tsunami,7,140,116,7.82,Medium,No,No
...,...,...,...,...,...,...,...,...,...,...
495,496,Office,Flood,1,12,164,18.31,High,Yes,No
496,497,Hospital,Fire,7,70,499,17.26,Medium,Yes,No
497,498,Mall,Tsunami,7,140,395,18.91,High,Yes,Yes
498,499,Hospital,Tsunami,9,180,483,4.29,Medium,No,Yes


In [None]:
df.isnull().sum()

Unnamed: 0,0
Threat_ID,0
Location,0
Disaster,0
Threat_Level,0
Required_Soldiers,0
Casualties_Estimated,0
Response_Time_Hours,0
Infrastructure_Damage,0
Evacuation_Required,0
Medical_Assistance_Needed,0


In [None]:
df.shape

(500, 10)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 10 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Threat_ID                  500 non-null    int64  
 1   Location                   500 non-null    object 
 2   Disaster                   500 non-null    object 
 3   Threat_Level               500 non-null    int64  
 4   Required_Soldiers          500 non-null    int64  
 5   Casualties_Estimated       500 non-null    int64  
 6   Response_Time_Hours        500 non-null    float64
 7   Infrastructure_Damage      500 non-null    object 
 8   Evacuation_Required        500 non-null    object 
 9   Medical_Assistance_Needed  500 non-null    object 
dtypes: float64(1), int64(4), object(5)
memory usage: 39.2+ KB


In [None]:
df.describe()

Unnamed: 0,Threat_ID,Threat_Level,Required_Soldiers,Casualties_Estimated,Response_Time_Hours
count,500.0,500.0,500.0,500.0,500.0
mean,250.5,5.43,73.856,259.892,12.54854
std,144.481833,2.941156,44.759201,139.434861,6.561336
min,1.0,1.0,10.0,11.0,1.06
25%,125.75,3.0,39.0,142.25,6.81
50%,250.5,5.0,70.0,261.0,12.865
75%,375.25,8.0,104.0,381.25,17.9525
max,500.0,10.0,200.0,499.0,23.95


In [None]:
# List of categorical columns to encode
categorical_columns = ["Location", "Disaster", "Infrastructure_Damage"]

In [None]:
# Apply Label Encoding only to categorical columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # Save encoder for future use

In [None]:
# Convert binary categorical columns
df["Evacuation_Required"] = df["Evacuation_Required"].map({"Yes": 1, "No": 0})
df["Medical_Assistance_Needed"] = df["Medical_Assistance_Needed"].map({"Yes": 1, "No": 0})


In [None]:
df

Unnamed: 0,Threat_ID,Location,Disaster,Threat_Level,Required_Soldiers,Casualties_Estimated,Response_Time_Hours,Infrastructure_Damage,Evacuation_Required,Medical_Assistance_Needed
0,1,4,3,7,84,112,20.52,2,0,1
1,2,4,4,4,80,445,4.14,2,1,1
2,3,4,1,8,120,358,17.30,1,1,1
3,4,2,4,5,100,280,13.71,0,0,1
4,5,4,4,7,140,116,7.82,2,0,0
...,...,...,...,...,...,...,...,...,...,...
495,496,2,3,1,12,164,18.31,0,1,0
496,497,0,2,7,70,499,17.26,2,1,0
497,498,1,4,7,140,395,18.91,0,1,1
498,499,0,4,9,180,483,4.29,2,0,1


In [None]:
x = df.drop('Required_Soldiers', axis=1).values
y = df['Required_Soldiers'].values

In [None]:
# Applying StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(x)

In [None]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# Train Random Forest Model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
model.score(X_train, y_train)

0.9999186394992535

In [None]:
# Predict and calculate accuracy
y_pred = model.predict(X_test)
accuracy = r2_score(y_test, y_pred) * 100
print(f"Model Accuracy: {accuracy:.2f}%")

Model Accuracy: 99.95%


In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)

In [None]:
lr.score(X_train, y_train)

0.8319120655446707

In [None]:
# Predict and calculate accuracy
y_pred = lr.predict(X_test)
accuracy = r2_score(y_test, y_pred) * 100
print(f"Model Accuracy: {accuracy:.2f}%")

Model Accuracy: 83.03%


In [None]:
from sklearn.tree import DecisionTreeRegressor

In [None]:
dtr = DecisionTreeRegressor()
dtr.fit(X_train, y_train)

In [None]:
dtr.score(X_train, y_train)

1.0

In [None]:
# Predict and calculate accuracy
y_pred = lr.predict(X_test)
accuracy = r2_score(y_test, y_pred) * 100
print(f"Model Accuracy: {accuracy:.2f}%")

Model Accuracy: 83.03%
