# COMP2002 Main Assignment
Machine Learning and Evolutionary Computation

In [47]:
import pandas
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error

## Part 1- Machine Learning

### 1.1 Data Preparation
The forest fire datasets are CSVs with the following fields:
<table><tr>
    <td>Day</td>
    <td>Month</td>
    <td>Year</td>
    <td>Temperature (degrees C)</td>
    <td>RH (Relative Humidity, %)</td>
    <td>WS (Wind speed, kmph)</td>
    <td>Rain (Total for day, mm)</td>
    <td>FFMC (Fine Fuel Moisture Code index)</td>
    <td>DMC (Duff Moisture Code index)</td>
    <td>DC (Drought Code index)</td>
    <td>ISI (Initial Spread Index)</td>
    <td>BUI (Buildup Index)</td>
    <td>FWI (Fire Weather Index)</td></tr></table>

In [48]:
# Read in forest fire datasets
ff_region1 = pandas.read_csv("datasets/AlgerianFF_Region1.csv")
ff_region2 = pandas.read_csv("datasets/AlgerianFF_Region2.csv")
# Randomly split into testing and training data

ff_combined = pandas.concat([ff_region1, ff_region2])
# Normalise the data so that all features are in range 0-1, while also removing the year (every value is the same so it is not useful)
scaler = MinMaxScaler()  # Range 0-1 is default
ff_combined_inputs = scaler.fit_transform(ff_combined[["day", "month", "Temperature", " RH", " Ws", "Rain ", "FFMC", "DMC", "DC", "ISI", "BUI"]])
# Randomly split into training and testing data
ff_train_inputs, ff_test_inputs, ff_train_targets, ff_test_targets = train_test_split(ff_combined_inputs, ff_combined["FWI"], shuffle=True)

- The datasets for the two regions were combined and then randomly split into testing and training data (as opposed to using one region for training and the other for testing) in case there are any features that are not in the dataset that differentiate the two regions which could cause the models to overfit to the training region

### 1.2 Regression

In [49]:
# Random Forest Regressor
# IMPLEMENT NORMALLY THEN EXPERIMENT WITH DIFFERENT WAYS OF SPLITTING DATA (INCL ONE THAT SPLITS INTO N TRAINING AND 1 TESTING REPEATEDLY)
def RF_Regression():
    RF_regressor = RandomForestRegressor()
    RF_regressor.fit(ff_train_inputs, ff_train_targets)
    return RF_regressor.predict(ff_test_inputs)

In [50]:
# Neural Network
def NN_Regression():
    NN_regressor = MLPRegressor()
    NN_regressor.fit(ff_train_inputs, ff_train_targets)
    return NN_regressor.predict(ff_test_inputs)

In [51]:
# Support Vector Machine
def SVM_Regression():
    SVM_regressor = SVR()
    SVM_regressor.fit(ff_train_inputs, ff_train_targets)
    return SVM_regressor.predict(ff_test_inputs)

### 1.3 Assessment of Regression

In [None]:
# Take average of MAE over 5 runs for more reliable results
RF_total = 0
NN_total = 0
SVM_total = 0
for i in range(5):
    RF_total += mean_absolute_error(y_true=ff_test_targets, y_pred=RF_Regression())
    NN_total += mean_absolute_error(y_true=ff_test_targets, y_pred=NN_Regression())
    SVM_total += mean_absolute_error(y_true=ff_test_targets, y_pred=SVM_Regression())
    
print("Random Forest MAE: " + str(RF_total / 5))
print("NN MAE: " + str(NN_total / 5))
print("SVM MAE: " + str(SVM_total / 5))

