In [None]:
import psycopg2
from dotenv import load_dotenv
import os

load_dotenv()

conn = psycopg2.connect(
    user=os.getenv("USER"),
    password=os.getenv("PASSWORD"),
    host=os.getenv("HOST"),
    dbname=os.getenv("DBNAME")
)

In [None]:
import pandas as pd
 
temp_readings = pd.read_sql_query("SELECT * FROM public.temp_readings", conn)
fire_alerts = pd.read_sql_query("SELECT * FROM public.fire_alerts", conn)
tweets = pd.read_sql_query("SELECT * FROM public.tweets", conn)

In [None]:
temp_readings

In [None]:
tweets

In [None]:
fire_alerts

# Experiment #1: Predicting fire alerts using temperature readings
Every sector at every day has a temperature reading, while only some sectors have messages. To start, we'll try to predict fire alerts on the day of using temperature readings only from the past 7 days.

In [None]:
from tqdm import tqdm

# create samples from the temperatures in X from the outputs in y
# the X has the temperature at a day index+1 (idx 0 = day 1)
# the y has the list of days that have fire alerts
# each sample should input 7 days, and output 1 or 0 for whether the 7th day has a fire alert
samples = []

for coord in tqdm(temp_readings['xy'].unique()):
    temp = temp_readings[temp_readings['xy'] == coord]
    fire = fire_alerts[fire_alerts['xy'] == coord]
    X = list(temp['temperature'])
    y = list(fire['event_day'])
    for i in range(7, len(X)):
        samples.append((X[i-7:i], 1 if i in y else 0))


In [None]:
import numpy as np

samples_x, samples_y = zip(*samples)
samples_x, samples_y = np.array(samples_x), np.array(samples_y)

len(samples_x)

In [None]:
# plot distribution of samples
import matplotlib.pyplot as plt

plt.hist(samples_y)

In [None]:
from imblearn.over_sampling import SMOTE

# correct any imbalance before training
smote = SMOTE()
samples_x, samples_y = smote.fit_resample(samples_x, samples_y)

In [None]:
plt.hist(samples_y)

In [None]:
# scale the x values
from sklearn.preprocessing import StandardScaler

def preprocess(x, y):
    scaler = StandardScaler()
    x = scaler.fit_transform(x)
    y = np.array(pd.get_dummies(y))
    return x, y, scaler

In [None]:
samples_x, samples_y, scaler = preprocess(samples_x, samples_y)

In [None]:
# save the scaler for future use
import pickle

with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

In [None]:
samples_x

In [None]:
samples_y

In [None]:
from sklearn.model_selection import train_test_split

train_x, test_x, train_y, test_y = train_test_split(samples_x, samples_y, test_size=0.2)

In [None]:
from keras import Sequential
from keras.layers import Dense

# implement a simple neural network
def build_model(length_x):
    model = Sequential([
        Dense(32, activation='swish', input_shape=(length_x,)),
        Dense(2, activation='softmax')
    ])
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
from keras.callbacks import EarlyStopping
callbacks = [
    EarlyStopping(
        # Stop training when `val_loss` is no longer improving
        monitor="val_loss",
        # "no longer improving" being defined as "no better than 1e-2 less"
        min_delta=1e-2,
        # "no longer improving" being further defined as "for at least 2 epochs"
        patience=5,
        verbose=1,
    )
]

dnn_model = build_model(len(samples_x[0]))

dnn_model.fit(
    train_x,
    train_y,
    epochs=100,
    callbacks=callbacks,
    batch_size=2048,
    validation_split=0.2,
    verbose=1,
)

In [None]:
dnn_model.evaluate(test_x, test_y, batch_size=512, return_dict=True)

In [None]:
# calculate the accuracy if we always predict [1, 0] to be the output
from sklearn.metrics import accuracy_score

accuracy_score(test_y, np.array([[1, 0]] * len(test_y)))

In [None]:
# find the index of a [0, 1] example in the test y
dnn_model.predict([test_x[1:2]])

# Results
The model is able to be very accurate, but this relies purely on temperature data. Before we move on to NLP and text analysis, let's try implementing an LSTM for this problem

# Experiment #2: Predicting fire alerts using temperature readings with an LSTM
The LSTM will take in the temperature readings from the past 7 days and output a prediction for the 7th day.

In [None]:
lstm_train_x = train_x.reshape((train_x.shape[0], train_x.shape[1], 1))
lstm_test_x = test_x.reshape((test_x.shape[0], test_x.shape[1], 1))

In [None]:
from keras.layers import LSTM

def build_lstm_model(length_x):
    model = Sequential([
        LSTM(50, activation='relu', input_shape=(length_x, 1)),
        Dense(2, activation='softmax')
    ])
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
lstm_model = build_lstm_model(len(samples_x[0]))

lstm_model.fit(
    lstm_train_x,
    train_y,
    epochs=100,
    callbacks=callbacks,
    batch_size=4096,
    validation_split=0.2,
    verbose=1,
)

In [None]:
lstm_model.evaluate(lstm_test_x, test_y, batch_size=4096, return_dict=True)

In [None]:
dnn_model.save('dnn_model.h5')
lstm_model.save('lstm_model.h5')