In [76]:
import pandas as pd   #import pandas to read csv filed
import numpy as np     #import numpy to create arrays for detecting the outliers

## Reading data and understanding it

In [77]:
data=pd.read_csv('/content/forestfires.csv')         #read csv file and show data
data

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.00
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.00
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.00
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.00
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,4,3,aug,sun,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,6.44
513,2,4,aug,sun,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,54.29
514,7,4,aug,sun,81.6,56.7,665.6,1.9,21.2,70,6.7,0.0,11.16
515,1,4,aug,sat,94.4,146.0,614.7,11.3,25.6,42,4.0,0.0,0.00


In [78]:
data.info()         #this shows information about data(no of columns=13, no of rows=517,types of data in each column and number of nulls which is zero)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 517 entries, 0 to 516
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   X       517 non-null    int64  
 1   Y       517 non-null    int64  
 2   month   517 non-null    object 
 3   day     517 non-null    object 
 4   FFMC    517 non-null    float64
 5   DMC     517 non-null    float64
 6   DC      517 non-null    float64
 7   ISI     517 non-null    float64
 8   temp    517 non-null    float64
 9   RH      517 non-null    int64  
 10  wind    517 non-null    float64
 11  rain    517 non-null    float64
 12  area    517 non-null    float64
dtypes: float64(8), int64(3), object(2)
memory usage: 52.6+ KB


# **Data preprocessing**

In [79]:
#isnull function to check if there is missing data
#sum function to calculate their total in each column
data[data.columns].isnull().sum()

X        0
Y        0
month    0
day      0
FFMC     0
DMC      0
DC       0
ISI      0
temp     0
RH       0
wind     0
rain     0
area     0
dtype: int64

In [80]:
#it gives describtion for coloumns of type object only
data.describe(include=['O']).T

Unnamed: 0,count,unique,top,freq
month,517,12,aug,184
day,517,7,sun,95


In [81]:
from time import strptime
data['Month'] = [strptime(str(x), '%b').tm_mon for x in data['month']]
#using time library to convert month column from categorical values to 12 months numeric by making new column called Month

In [82]:
data['day'].replace({'sat': 1, 'sun': 2, 'mon': 3, 'tue': 4, 'wed':5, 'thu':6, 'fri':7}, inplace=True)
#using replace method and dictionary to convert days of the week from categorical values to numeric in the same column

In [83]:
data.head()  #shows top rows from dataset

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,Month
0,7,5,mar,7,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0,3
1,7,4,oct,4,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0,10
2,7,4,oct,1,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0,10
3,8,6,mar,7,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0,3
4,8,6,mar,2,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0,3


In [84]:
data['area'] = np.where(data['area']>12, '1', '0')   #convert values in area column into binary values (0 and 1) to handle bias in column values and increase accuracy in predictions
data['area']= pd.to_numeric(data['area'])        #adding values again after converting them in the same area columns
data.head()                                #shows top values after converting

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,Month
0,7,5,mar,7,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0,3
1,7,4,oct,4,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0,10
2,7,4,oct,1,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0,10
3,8,6,mar,7,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0,3
4,8,6,mar,2,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0,3


In [85]:
data['area'].value_counts()     #count values in area column to ensure that values now are binary

0    438
1     79
Name: area, dtype: int64

In [86]:
data.dtypes  #shows types of data  after converting them

X          int64
Y          int64
month     object
day        int64
FFMC     float64
DMC      float64
DC       float64
ISI      float64
temp     float64
RH         int64
wind     float64
rain     float64
area       int64
Month      int64
dtype: object

In [87]:
data.describe().T
#checking for outliers and scale of data

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
X,517.0,4.669246,2.313778,1.0,3.0,4.0,7.0,9.0
Y,517.0,4.299807,1.2299,2.0,4.0,4.0,5.0,9.0
day,517.0,3.83559,2.092356,1.0,2.0,4.0,6.0,7.0
FFMC,517.0,90.644681,5.520111,18.7,90.2,91.6,92.9,96.2
DMC,517.0,110.87234,64.046482,1.1,68.6,108.3,142.4,291.3
DC,517.0,547.940039,248.066192,7.9,437.7,664.2,713.9,860.6
ISI,517.0,9.021663,4.559477,0.0,6.5,8.4,10.8,56.1
temp,517.0,18.889168,5.806625,2.2,15.5,19.3,22.8,33.3
RH,517.0,44.288201,16.317469,15.0,33.0,42.0,53.0,100.0
wind,517.0,4.017602,1.791653,0.4,2.7,4.0,4.9,9.4


In [88]:
#calculating the 75th and 25th quartile using the describe function for ISI column
quartile75 = data['ISI'].describe()['75%']
quartile25 = data['ISI'].describe()['25%']
print("75th quartile: ",quartile75)
print("25th quartile: ",quartile25)

75th quartile:  10.8
25th quartile:  6.5


In [89]:
#calculate the iqr by the difference between two quartiles
iqr = quartile75 - quartile25
print ("IQR: ",iqr)

IQR:  4.300000000000001


In [90]:
# Calculate the upper and lower limits
upper = quartile75 + ( 1.5*iqr )
lower = quartile25 - (1.5*iqr )

In [91]:
# Create arrays of Boolean values indicating the outlier rows
upper_arr = np.where(data['ISI']>=upper)[0]
lower_arr = np.where(data['ISI']<=lower)[0]

In [92]:
# Removing the outliers
data.drop(data[data['ISI'] > upper].index , inplace=True)
data.drop(data[data['ISI'] < lower].index , inplace=True)

In [93]:
data['ISI'].describe().T
#show data after removing outlier in ISI column

count    494.000000
mean       8.514372
std        3.515999
min        0.400000
25%        6.300000
50%        8.200000
75%       10.350000
max       17.000000
Name: ISI, dtype: float64

## Features Extraction

In [94]:
data.drop(['month','temp','RH','wind','rain','X','Y'], axis=1, inplace=True)
#drop the categorical month column after replacing it by numeric column
#drop temp ,RH(humidity), wind,rain columns as according to the domain FFMC,DMC,DC,ISI depend on them in FFWI system so, its a reduntant data
#drop x and y columns as I think its not going to be useful in the predictions

## splitting data and Scaling it

In [95]:
from sklearn.model_selection import train_test_split  #this import for splitting data

X = data.drop('area', axis=1)   #variable x include all data features except area to be label
y = data['area']                #variable y include area only which is label

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [96]:
from sklearn.preprocessing import StandardScaler  #import for scaling data by standard scaler

scaler = StandardScaler()
#scaling data in train and test but without scaling label which is area
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Building the Model

In [97]:
from numpy import loadtxt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow as tf
from tensorflow import keras
#import to apply neural network model by keras in tensorflow library

In [98]:
# Build the neural network model
model = keras.Sequential([
    keras.layers.Input(shape=(6,)),   # Input layer
    keras.layers.Dense(3, activation='relu'),  # Hidden layer with ReLU activation
    keras.layers.Dense(1, activation='sigmoid') # Output layer with sigmoid activation
])

In [99]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [100]:
# Train the model
model.fit(X_train_scaled, y_train, epochs=1000, batch_size=32, validation_split=0.1)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7ec3bee1add0>

In [101]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

Test Loss: 0.4395
Test Accuracy: 0.8485


In [102]:
# Make predictions
predictions = model.predict(X_test_scaled)

