In [2]:
!pip install keras

Collecting keras
  Downloading keras-2.10.0-py2.py3-none-any.whl (1.7 MB)
Installing collected packages: keras
Successfully installed keras-2.10.0


In [None]:
!pip install tensorflow

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from keras.models import Sequential
from keras.layers import Dense, Activation,Layer,Lambda
import seaborn as sns
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from sklearn.decomposition import PCA
from mlxtend.plotting import plot_decision_regions

## __1 - Business Problem__  
___PREDICT THE BURNED AREA OF FOREST FIRES WITH NEURAL NETWORKS___  

## __2 - Data collection and description__ 

In [None]:
df = pd.read_csv("../DATASCIENCE/forestfires.csv")

In [None]:
df1 = df.copy()

In [None]:
df1

In [None]:
df1.drop(["month","day"],axis=1,inplace = True)

In [None]:
df1["size_category"].value_counts()

In [None]:
df1.isnull().sum()

In [None]:
df1.describe().T

### Outlier Check

In [None]:
ax = sns.boxplot(df1['area'])

### There are 3 Outlier instances in our data

In [None]:
plt.rcParams["figure.figsize"] = 9,5

In [None]:
plt.figure(figsize=(16,5))
print("Skew: {}".format(df1['area'].skew()))
print("Kurtosis: {}".format(df1['area'].kurtosis()))
ax = sns.kdeplot(df1['area'],shade=True,color='g')
plt.xticks([i for i in range(0,1200,50)])
plt.show()

### The Data is highly skewed and has large kurtosis value  
### Majority of the forest fires do not cover a large area, most of the damaged area is under 100 hectares of land

In [None]:
dfa = df1[df1.columns[0:10]]
month_colum = dfa.select_dtypes(include='object').columns.tolist()

In [None]:
plt.figure(figsize=(16,10))
for i,col in enumerate(month_colum,1):
    plt.subplot(2,2,i)
    sns.countplot(data=dfa,y=col)
    plt.subplot(2,2,i+2)
    df1[col].value_counts(normalize=True).plot.bar()
    plt.ylabel(col)
    plt.xlabel('% distribution per category')
plt.tight_layout()
plt.show()  

### Majority of the fire accors in the month Aug and Sep  
### For Days Sun and Fri have recoreded the most cases

In [None]:
num_columns = dfa.select_dtypes(exclude='object').columns.tolist()

In [None]:
plt.figure(figsize=(18,40))
for i,col in enumerate(num_columns,1):
    plt.subplot(8,4,i)
    sns.kdeplot(df[col],color='g',shade=True)
    plt.subplot(8,4,i+10)
    df[col].plot.box()
plt.tight_layout() 
plt.show()
num_data = df[num_columns]
pd.DataFrame(data=[num_data.skew(),num_data.kurtosis()],index=['skewness','kurtosis'])

### Finding Correlation

In [None]:
corr = df1[df1.columns[0:11]].corr()

In [None]:
plt.figure(figsize=(10,10))
sns.heatmap(corr,annot=True)

## __3 - Neural Network Model__ 

In [None]:
mapping = {'small': 1, 'large': 2}

In [None]:
df1 = df1.replace(mapping)

In [None]:
X = np.array(df1.iloc[:,0:28])
y = np.array(df1.iloc[:,28])

In [None]:
def norm_func(i):
    x = (i-i.min())/(i.max()-i.min())
    return (x)

In [None]:
X_norm = norm_func(X)

In [None]:
x_train,x_test,y_train,y_test= train_test_split(X_norm,y, test_size=0.2,stratify = y)

In [None]:
model = Sequential()
model.add(Dense(12, input_dim=28, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.fit(x_train, y_train, epochs=150, batch_size=10)

In [None]:
_, accuracy = model.evaluate(x_train, y_train)
print('Accuracy: %.2f' % (accuracy*100))

## __4 - Conclusion__ 

In [None]:
predictions = model.predict_classes(x_train)

In [None]:
for i in range(5):
    print('%s => %d (expected %d)' % (X[i].tolist(), predictions[i], y[i]))