In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error # to be used alongside root_mean_squared_error
from sklearn.preprocessing import LabelEncoder
import pickle

In [2]:
df = pd.read_csv("data_fire_resistance.csv")
df

Unnamed: 0,structural_type,construction_material,fire_load_type,wall_thickness,height_floors,temperature,wind,humidity,area_m2,paint,room_number,room_occupants,fire_resistance
0,Masonry Structures,Concrete,Residential,2,5,20.645259,7.090407,54.387080,1354,Fire Resistant Paint,10,10,3.5
1,Timber Structures,Steel,Commercial,1,26,25.424987,7.322115,45.218521,944,Fire Resistant Paint,67,32,2.8
2,Timber Structures,Concrete,Residential,2,3,24.313027,1.962642,68.110925,1998,Standard Paint,3,5,3.8
3,Timber Structures,Steel,Commercial,2,17,24.128319,3.066042,71.145703,1872,Standard Paint,74,22,2.0
4,Timber Structures,Timber,Commercial,2,38,10.848886,1.570751,60.552403,1070,Fire Resistant Paint,84,30,3.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
22995,Timber Structures,Concrete,Residential,2,2,29.579252,9.580386,48.106025,437,Standard Paint,8,6,3.4
22996,Sandstone,Timber,Commercial,1,44,14.117714,1.578689,79.286498,1521,Fire Resistant Paint,65,28,3.4
22997,Brick,Timber,Commercial,1,38,15.620564,5.346465,48.826350,1710,Fire Resistant Paint,77,24,4.4
22998,Brick,Timber,Commercial,1,14,25.247550,1.349633,53.155260,584,Fire Resistant Paint,94,40,3.1


In [3]:
# Initialize label encoders
label_encoders = {
    'structural_type': LabelEncoder(),
    'construction_material': LabelEncoder(),
    'fire_load_type': LabelEncoder(),
    'paint': LabelEncoder()
}

# Fit the encoders with the data and transform the data
df['structural_type'] = label_encoders['structural_type'].fit_transform(df['structural_type'])
df['construction_material'] = label_encoders['construction_material'].fit_transform(df['construction_material'])
df['fire_load_type'] = label_encoders['fire_load_type'].fit_transform(df['fire_load_type'])
df['paint'] = label_encoders['paint'].fit_transform(df['paint'])

# Save the fitted encoders to a file
with open('label_encoders.pkl', 'wb') as encoders_file:
    pickle.dump(label_encoders, encoders_file)

# Display the transformed DataFrame
df


Unnamed: 0,structural_type,construction_material,fire_load_type,wall_thickness,height_floors,temperature,wind,humidity,area_m2,paint,room_number,room_occupants,fire_resistance
0,1,0,1,2,5,20.645259,7.090407,54.387080,1354,0,10,10,3.5
1,4,1,0,1,26,25.424987,7.322115,45.218521,944,0,67,32,2.8
2,4,0,1,2,3,24.313027,1.962642,68.110925,1998,1,3,5,3.8
3,4,1,0,2,17,24.128319,3.066042,71.145703,1872,1,74,22,2.0
4,4,2,0,2,38,10.848886,1.570751,60.552403,1070,0,84,30,3.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
22995,4,0,1,2,2,29.579252,9.580386,48.106025,437,1,8,6,3.4
22996,3,2,0,1,44,14.117714,1.578689,79.286498,1521,0,65,28,3.4
22997,0,2,0,1,38,15.620564,5.346465,48.826350,1710,0,77,24,4.4
22998,0,2,0,1,14,25.247550,1.349633,53.155260,584,0,94,40,3.1


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23000 entries, 0 to 22999
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   structural_type        23000 non-null  int64  
 1   construction_material  23000 non-null  int64  
 2   fire_load_type         23000 non-null  int64  
 3   wall_thickness         23000 non-null  int64  
 4   height_floors          23000 non-null  int64  
 5   temperature            23000 non-null  float64
 6   wind                   23000 non-null  float64
 7   humidity               23000 non-null  float64
 8   area_m2                23000 non-null  int64  
 9   paint                  23000 non-null  int64  
 10  room_number            23000 non-null  int64  
 11  room_occupants         23000 non-null  int64  
 12  fire_resistance        23000 non-null  float64
dtypes: float64(4), int64(9)
memory usage: 2.3 MB


In [5]:
# Define features and target variable
x = df.drop(columns=["fire_resistance"])  # Features
y = df["fire_resistance"]  # Target
# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((18400, 12), (4600, 12), (18400,), (4600,))

In [6]:
#Train a Random Forest Regressor
model = RandomForestRegressor(n_estimators=1000, max_depth=10, min_samples_split=5, min_samples_leaf=5, random_state=42)
model.fit(x_train, y_train)

In [7]:
# Make predictions
y_pred = model.predict(x_test)
y_pred

array([3.51325469, 3.47044791, 3.72251412, ..., 3.62641211, 3.58144676,
       3.59146776])

In [8]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")

Mean Squared Error: 2.1355408671503393
Root Mean Squared Error: 1.4613489888285889


In [9]:
import pickle

# Saving the model with pickle
with open('fire_model.pkl', 'wb') as file:
    pickle.dump(model, file)


In [10]:
with open('fire_model.pkl', 'rb') as f:
    model = pickle.load(f)

In [11]:
model