In [None]:
# For now, clear outputs (Cell > All Output > Clear) before committing to Git
# There might be a better way

import sqlite3
import pandas as pd
cnx = sqlite3.connect('FPA_FOD_20170508.sqlite')

import matplotlib.pyplot as plt
import numpy as np
from sklearn import tree, preprocessing
import sklearn.ensemble as ske
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [None]:
df = pd.read_sql_query("SELECT FIRE_YEAR,DISCOVERY_TIME,STAT_CAUSE_DESCR,CONT_DATE,CONT_TIME,LATITUDE,LONGITUDE,STATE,DISCOVERY_DATE,FIRE_SIZE,FIRE_SIZE_CLASS FROM 'Fires'", cnx)
print(df.head())

In [None]:
df['DISCOVERY_DATE'] = pd.to_datetime(df['DISCOVERY_DATE'] - pd.Timestamp(0).to_julian_date(), unit='D')
df['CONT_DATE'] = pd.to_datetime(df['CONT_DATE'] - pd.Timestamp(0).to_julian_date(), unit='D')
print(df.head())

In [None]:
df['DISCOVERY_MONTH'] = pd.DatetimeIndex(df['DISCOVERY_DATE']).month
df['DISCOVERY_DAY'] = pd.DatetimeIndex(df['DISCOVERY_DATE']).day
df['DISCOVERY_DAY_OF_WEEK'] = df['DISCOVERY_DATE'].dt.weekday_name

#df['CONT_DATE'].fillna(0)
df['CONT_MONTH'] = pd.DatetimeIndex(df['CONT_DATE']).month
df['CONT_DAY'] = pd.DatetimeIndex(df['CONT_DATE']).day
df['CONT_DAY_OF_WEEK'] = df['CONT_DATE'].dt.weekday_name
print(df.head())

In [None]:
le = preprocessing.LabelEncoder()
df['STAT_CAUSE_DESCR'] = le.fit_transform(df['STAT_CAUSE_DESCR'])
df['STATE'] = le.fit_transform(df['STATE'])
df['DISCOVERY_DAY_OF_WEEK'] = le.fit_transform(df['DISCOVERY_DAY_OF_WEEK'])
print(df.head())

In [None]:
df['CONT_DAY_OF_WEEK']=df['CONT_DAY_OF_WEEK'].fillna("Unknown")
df['CONT_DAY']=df['CONT_DAY'].fillna("0")
df['CONT_MONTH']=df['CONT_MONTH'].fillna("0")
df['CONT_TIME']=df['CONT_TIME'].fillna("0")
df['DISCOVERY_TIME']=df['DISCOVERY_TIME'].fillna("0")

df['CONT_DAY_OF_WEEK'] = le.fit_transform(df['CONT_DAY_OF_WEEK'])
df['FIRE_SIZE_CLASS'] = le.fit_transform(df['FIRE_SIZE_CLASS'])
print(df.head())

In [None]:
df['CONT_MONTH']=df['CONT_MONTH'].astype('Float64')
df['CONT_DAY']=df['CONT_DAY'].astype('Float64')
#df['CONT_TIME']=df['CONT_TIME'].astype('Float64')
#df['DISCOVERY_DATE']=df['DISCOVERY_DATE'].astype('Float64')

In [None]:
for item in df:
    print(item)
    print(df[item].dtype)

In [None]:
labels=df['FIRE_SIZE_CLASS']
labels.head()

In [None]:
logits=df.drop(['FIRE_SIZE','FIRE_SIZE_CLASS','DISCOVERY_DATE','CONT_DATE','STATE','CONT_TIME','DISCOVERY_TIME'],axis=1)
logits.head()

In [None]:
x_train, x_test, y_train, y_test = train_test_split(logits,labels,test_size=0.2)

In [None]:
for item in logits:
    print(item)
    print(logits[item].dtype)

In [None]:
reg = LinearRegression().fit(x_train, y_train)

In [None]:
reg.score(x_test,y_test)

In [None]:
import tensorflow
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation

In [None]:
from keras.activations import relu

In [None]:
model=Sequential()
model.add(Dense(50,input_shape=[8]))
model.add(Activation('relu'))
model.add(Dense(1))

In [None]:
model.summary()

In [None]:
model.compile(optimizer='sgd',
              loss='mean_squared_error',
              metrics=['accuracy'])

In [None]:
model.fit(x_train,y_train,epochs=10)

In [None]:
model.evaluate(x_test,y_test)

In [None]:
a=model.predict(x_test)

In [None]:
a[100]

In [None]:
df['STAT_CAUSE_DESCR'].value_counts().plot(kind='barh',color='coral')
plt.show()

In [None]:
df_arson = df[df['STAT_CAUSE_DESCR']=='Arson']
df_arson['DAY_OF_WEEK'].value_counts().plot(kind='barh',color='coral')
plt.show()

In [None]:
def plot_corr(df,size=10):
    corr = df.corr()  #the default method is pearson
    fig, ax = plt.subplots(figsize=(size, size))
    ax.matshow(corr,cmap=plt.cm.Oranges)
    plt.xticks(range(len(corr.columns)), corr.columns)
    plt.yticks(range(len(corr.columns)), corr.columns)
    for tick in ax.get_xticklabels():
        tick.set_rotation(45)    
    plt.show()
    

    
plot_corr(df)

In [None]:
df.plot(kind='scatter',x='LONGITUDE',y='LATITUDE',color='coral',alpha=0.3)
plt.show()

In [None]:
df_lightning = df[df['STAT_CAUSE_DESCR']=='Lightning']
df_lightning['DAY_OF_WEEK'].value_counts().plot(kind='barh',color='coral')
plt.show()

In [None]:
df['DAY_OF_WEEK'].value_counts().plot(kind='barh',color='coral')
plt.show()

In [None]:
df['STATE'].value_counts().head(n=10).plot(kind='barh',color='coral')
plt.show()

In [None]:
df_CA = df[df['STATE']=='CA']
df_CA['STAT_CAUSE_DESCR'].value_counts().plot(kind='barh',color='coral',title='causes of fires for CA')
plt.show()