# Course: Deep Learning
# Author: Sandro Camargo <sandrocamargo@unipampa.edu.br>
# Linear Regression Example
# Dataset: https://archive.ics.uci.edu/ml/datasets/Algerian+Forest+Fires+Dataset++

In [None]:
# importing libraries
import numpy as np
from sklearn import linear_model
from sklearn import metrics
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# loading data
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00547/Algerian_forest_fires_dataset_UPDATE.csv', header=1, skiprows=[124,125,126,170])
# About the main parameters:
# Header=1: column names (day, month, year, ...) are in the line 1 of this CSV file.
# skiprows=[124,125,126,170]: this lines, which not contains valid data, are not imported. If this parameter is missing, all lines are imported.
# usecols=list(range(0,13)): The last column, which is named Classes, is not imported. If this parameter is missing, all columns are imported.
# delimiter=",": when the delimiter among columns is not a ;

# inspecting data
data.info()

In [None]:
# Generating Descriptive statistics
data.describe()

Inputs($x$) and outputs($y$) must be splitted in different variables.

In [None]:
# Data must be in numpy.ndarray format
x = data.iloc[:,0:12] # columns from 1 to 11 are the inputs (x)
col = x.columns #store column names
x = x.to_numpy() # Convert to numpy.ndarray
y = data['FWI'] # column FWI is the output (y)
y = y.to_numpy() # Convert to numpy.ndarray

Creating and fitting the linear regression model

In [None]:
regr = linear_model.LinearRegression()
regr.fit(x, y)

pred = regr.predict(x)

In [None]:
# The coefficients
print("Coefficients: \n", regr.coef_)
# The mean squared error
print("Mean squared error: %.4f" % metrics.mean_squared_error(y, pred))
print("Correlation Coefficient: %.4f " % np.corrcoef(y, pred)[0,1])
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.4f" % metrics.r2_score(y, pred))

In [None]:
# Plot model outputs
plt.scatter(y, pred, color="blue")
plt.plot([-5,35], [-5,35], color = 'black', ls = '--')
plt.xlabel("Real FWI")
plt.ylabel("Predicted FWI")
plt.title("Correlation Coefficient: %.4f" % np.corrcoef(y, pred)[0,1])
plt.show()

In [None]:
# Correlation coefficients of inputs
plt.barh(col, regr.coef_, align='center')
plt.xlabel("Regression Coefficients")
plt.ylabel("Inputs")
plt.title("FWI Linear Regression Model")

In [None]:
plt.scatter(x[:,10],y)
plt.xlabel("ISI")
plt.ylabel("FWI")
plt.title("Correlation Coefficient: %.4f" % np.corrcoef(x[:,10], y)[0,1])

In [None]:
plt.scatter(x[:,11],y)
plt.xlabel("BUI")
plt.ylabel("FWI")
plt.title("Correlation Coefficient: %.4f" % np.corrcoef(x[:,11], y)[0,1])

In [None]:
plt.scatter(x[:,5],y)
plt.xlabel("RH")
plt.ylabel("FWI")
plt.title("Correlation Coefficient: %.4f" % np.corrcoef(x[:,5], y)[0,1])