# **Forest Fires**

In [None]:
import pandas as pd
import numpy as np
import keras
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

In [None]:
df = pd.read_csv('/content/forestfires.csv')
df.head()

# **Exploratory Data Analysis**

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.size

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df.columns

In [None]:
df.dtypes

In [None]:
# Loop for getting Categorical data
for column in df.columns:
  if df[column].dtype == 'object':
    print(column)

In [None]:
# Loop for getting Continuous data
for column in df.columns:
  if df[column].dtype != 'object':
    print(column)

# **Correlation Matrix**

In [None]:
df.corr()

In [None]:
plt.figure(figsize=(30,15))
sns.heatmap(df.corr(),annot = True,cmap = 'RdYlGn')
plt.show()

In [None]:
Continuous = ['FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind',
       'rain', 'area', 'dayfri', 'daymon', 'daysat', 'daysun', 'daythu',
       'daytue', 'daywed', 'monthapr', 'monthaug', 'monthdec', 'monthfeb',
       'monthjan', 'monthjul', 'monthjun', 'monthmar', 'monthmay', 'monthnov',
       'monthoct', 'monthsep' ]
Category =['month','day','size_category']

# **Distribution of Continuous**

In [None]:
for feature in Continuous:
  sns.displot(data = df, x = feature, height = 7,aspect = 2, color = '#158685')
  plt.show()

# **Distribution of Category**

In [None]:
for feature in Category:
  sns.countplot(data = df, x = feature , palette='deep')
  plt.show()

# **Box Plot**

In [None]:
columns = ['FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind',
       'rain', 'area', 'dayfri', 'daymon', 'daysat', 'daysun', 'daythu',
       'daytue', 'daywed', 'monthapr', 'monthaug', 'monthdec', 'monthfeb',
       'monthjan', 'monthjul', 'monthjun', 'monthmar', 'monthmay', 'monthnov',
       'monthoct', 'monthsep']
for col in columns:
    plt.figure()   # plots figure for each iteration
    sns.boxplot(df[col])
    plt.show()

# **Dist Plot**

In [None]:
for cols in columns:
  plt.figure()
  sns.distplot(df[col])
  plt.show()

# **Violin Plot**

In [None]:
for col in columns:
  plt.figure()
  sns.violinplot(df[col])
  plt.show()

# **Encoding the Categorical Variable**

In [None]:
df.drop(labels = ['month','day'], axis = 1, inplace = True)

In [None]:
df.head()

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['size_category'] = le.fit_transform(df['size_category'])

In [None]:
df.head()

In [None]:
df['size_category'].value_counts()

We can see tha in size_category the small observations are 378 and the large observations are 139.

Here the data is imbalanced.

# **Defining Independent and Dependent Variable**

In [None]:
X = df.drop('size_category',axis = 1)
Y = df.iloc[:,-1]

In [None]:
X.head()

In [None]:
Y.head()

In [None]:
X.shape,Y.shape

# **Balancing the data by Smote**

In [None]:
Y.value_counts()

In [None]:
smote = SMOTE(sampling_strategy = 'minority')

oversample = SMOTE()
X, Y = oversample.fit_resample(X, Y)

Y.value_counts()

Now, here the data is balanced so we can now move further.

# **Scaling the Data**

In [None]:
# Standardization
a = StandardScaler()
a.fit(X)
X_standardized = a.transform(X)

In [None]:
X_standardized.shape

In [None]:
pd.DataFrame(X_standardized)

# **Splitting the data into Training and Testing dataset**

In [None]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3,random_state=42)

In [None]:
X_train.shape,X_test.shape

# **Tuning the Hyperparameters :- Batch size and Epochs**

In [None]:
from sklearn.model_selection import GridSearchCV, KFold
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import adam_v2

In [None]:
# Create model
def create_model():
  model = Sequential()
  model.add(Dense(30, input_dim = 28, kernel_initializer='uniform',activation='relu')) 
  model.add(Dense(25, kernel_initializer='uniform',activation='relu'))
  model.add(Dense(1, kernel_initializer='uniform',activation='sigmoid'))

  adam = adam_v2.Adam(lr = 0.01)

  model.compile(loss = 'binary_crossentropy',
                optimizer = adam,
                metrics = ['accuracy'])
  
  return model

In [None]:
# Create the model
model = KerasClassifier(build_fn = create_model, verbose = 0)


# Define the grid search parameter
batch_size = [10,20,40,60]
epochs = [10,50,100,150]

#Make a dictionary of the grid search parameters
param_grid = dict(batch_size = batch_size,
                  epochs = epochs)

# Build and fit the GridSearchCV
grid = GridSearchCV(estimator = model,
                    param_grid = param_grid,
                    cv = KFold(),
                    verbose = 10)

grid_result = grid.fit(X_standardized, Y)

In [None]:
# Summarize the results
print('Best : {}, using {}' .format(grid_result.best_score_,grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean,stdev,param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean,stdev, param))

# **Tuning of Hyperparameters:-Learning Rate and Drop out rate**

In [None]:
from keras.layers import Dropout

# Defining the model
def create_model(learning_rate, dropout_rate):
  model1 = Sequential()
  model1.add(Dense(30, input_dim = 28, kernel_initializer='uniform',activation='relu'))
  model1.add(Dropout(dropout_rate))
  model1.add(Dense(25, input_dim = 28, kernel_initializer='uniform',activation='relu'))
  model1.add(Dropout(dropout_rate))
  model1.add(Dense(1,activation='sigmoid'))

  adam = adam_v2.Adam(lr = learning_rate)

  model1.compile(loss = 'binary_crossentropy',
                optimizer = adam,
                metrics = ['accuracy'])
  return model1

In [None]:
# Create the model
model1 = KerasClassifier(build_fn = create_model, 
                        verbose = 0,
                        batch_size = 20,
                        epochs = 150)


# Define the grid search parameter
learning_rate = [0.001,0.01,0.1]
dropout_rate= [0.0,0.1,0.2]

#Make a dictionary of the grid search parameters
param_grid = dict(learning_rate = learning_rate,
                  dropout_rate = dropout_rate)

# Build and fit the GridSearchCV
grid = GridSearchCV(estimator = model1,
                    param_grid = param_grid,
                    cv = KFold(),
                    verbose = 10)

grid_result = grid.fit(X_standardized, Y)

In [None]:
# Summarize the results
print('Best : {}, using {}' .format(grid_result.best_score_,grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean,stdev,param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean,stdev, param))

# **Tuning of Hyperparameters:- Activation Function and Kernel Initializer**

In [None]:
# Defining the model

def create_model(activation_function,init):
    model2 = Sequential()
    model2.add(Dense(30,input_dim = 28,kernel_initializer = init,activation = activation_function))
    model2.add(Dropout(0.2))
    model2.add(Dense(25,input_dim = 28,kernel_initializer = init,activation = activation_function))
    model2.add(Dropout(0.2))
    model2.add(Dense(1,activation = 'sigmoid'))
    
    adam = adam_v2.Adam(lr = 0.001)
    model2.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model2

# Create the model

model2 = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 20,epochs = 150)

# Define the grid search parameters
activation_function = ['softmax','relu','tanh','linear']
init = ['uniform','normal','zero']

# Make a dictionary of the grid search parameters
param_grids = dict(activation_function = activation_function,init = init)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model2,param_grid = param_grids,cv = KFold(),verbose = 10)
grid_result = grid.fit(X_standardized,Y)

In [None]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param))

#**Tuning of Hyperparameter :-Number of Neurons in activation layer**

In [None]:
# Defining the model

def create_model(neuron1,neuron2):
    model3 = Sequential()
    model3.add(Dense(neuron1,input_dim = 28,kernel_initializer = 'normal',activation = 'tanh'))
    model3.add(Dropout(0.2))
    model3.add(Dense(neuron2,input_dim = neuron1,kernel_initializer = 'normal',activation = 'tanh'))
    model3.add(Dropout(0.2))
    model3.add(Dense(1,activation = 'sigmoid'))
    
    adam = adam_v2.Adam(lr = 0.001)
    model3.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model3

# Create the model

model3 = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 20,epochs = 150)

# Define the grid search parameters

neuron1 = [4,8,16,18,25,30,35]
neuron2 = [2,4,8,16,18,25,30]

# Make a dictionary of the grid search parameters

param_grids = dict(neuron1 = neuron1,neuron2 = neuron2)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model3,param_grid = param_grids,cv = KFold(),verbose = 10)
grid_result = grid.fit(X_standardized,Y)

In [None]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param))

# **Training model with optimum values of Hyperparameters**

In [None]:
from sklearn.metrics import classification_report, accuracy_score

# Defining the model

def create_model():
    model4 = Sequential()
    model4.add(Dense(8,input_dim = 28,kernel_initializer = 'normal',activation = 'tanh'))
    model4.add(Dropout(0.2))
    model4.add(Dense(18,input_dim = 8,kernel_initializer = 'normal',activation = 'tanh'))
    model4.add(Dropout(0.2))
    model4.add(Dense(1,activation = 'sigmoid'))
    
    adam = adam_v2.Adam(lr = 0.001) #sgd = SGD(lr=learning_rate, momentum=momentum, decay=decay_rate, nesterov=False)
    model4.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model4

# Create the model

model4 = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 20,epochs = 150)

# Fitting the model

model4.fit(X_standardized,Y)

# Predicting using trained model

Y_predict = model4.predict(X_standardized)

# Printing the metrics
print(accuracy_score(Y,Y_predict))

# **Building Neural Networks Model using Optimal Values**

In [None]:
 # create ANN model
model = Sequential()
# Defining the first layer of the model
model.add(Dense(units=8, input_dim=X_train.shape[1], kernel_initializer='normal', activation='tanh'))
model.add(Dropout(0.2))        
# Defining the Second layer of the model
model.add(Dense(units=18, kernel_initializer='normal', activation='tanh'))
model.add(Dropout(0.2))  
# The output neuron is a single fully connected node 
# Since we will be predicting a single number
model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

model.compile(optimizer='Adam',loss='binary_crossentropy', metrics=['accuracy'])

# Training the model with best parameters
history = model.fit(X_train, Y_train, validation_split=0.33, batch_size = 20, epochs = 150)

In [None]:
# Evaluate the model
scores = model.evaluate(X,Y)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

In [None]:
model.metrics_names

In [None]:
scores

In [None]:
#visualize training history

#list all data in history
history.history.keys()

In [None]:
#Summarize history for accuracy
import matplotlib.pyplot as plt
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train','test'], loc='upper left')
plt.show()

In [None]:
#Summarize history for loss
import matplotlib.pyplot as plt
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','test'], loc='upper left')
plt.show()