DSCI 100 008 Project
Group 8

In [1]:
#Loading Packages
library(repr)
library(tidyverse)
library(tidymodels)
library(dplyr)
library(cowplot)

set.seed(999)
options(repr.plot.width = 8, repr.plot.height = 8)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.4.2     [32m✔[39m [34mpurrr  [39m 1.0.1
[32m✔[39m [34mtibble [39m 3.2.1     [32m✔[39m [34mdplyr  [39m 1.1.1
[32m✔[39m [34mtidyr  [39m 1.3.0     [32m✔[39m [34mstringr[39m 1.5.0
[32m✔[39m [34mreadr  [39m 2.1.3     [32m✔[39m [34mforcats[39m 0.5.2
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
── [1mAttaching packages[22m ────────────────────────────────────── tidymodels 1.0.0 ──

[32m✔[39m [34mbroom       [39m 1.0.2     [32m✔[39m [34mrsample     [39m 1.1.1
[32m✔[39m [34mdials       [39m 1.1.0     [32m✔[39m [34mtune        [39m 1.0.1
[32m✔[39m [34minfer       [39m 1.0.4     [32m✔[39m [34mworkflows   [39m 1.1.2
[32m✔[39

In [None]:
ff_raw <- read_csv("https://raw.githubusercontent.com/MingTxm/DSCI100_GroupProject/main/forestfires.csv")
head(ff_raw)

[1mRows: [22m[34m517[39m [1mColumns: [22m[34m13[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (2): month, day
[32mdbl[39m (11): X, Y, FFMC, DMC, DC, ISI, temp, RH, wind, rain, area

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [None]:
# Filtering Relevant Data
ff <- ff_raw |>
    select(month, ISI, temp, wind) |>
    arrange(desc(month)) |>
    drop_na()
head(ff)
nrow(ff)

# Splitting the Data
ff_split <- initial_split(ff, prop = 0.75, strata = ISI)
    ff_train <- training(ff_split)
    ff_test <- testing(ff_split)

In [None]:
# ISI vs Temperature Plot #
ff_temp_plot <- ff |>
    ggplot(aes(x = temp, y = ISI)) +
        geom_point(alpha = 0.4) +
        labs(x = "Temperature (Celsius)", y = "ISI Index", title = "Fig. 1 - ISI Index vs Temperature") +
        theme(text = element_text(size = 12))
ff_temp_plot

In [None]:
## Temp Regression Model ##

ff_train_temp <- ff_train |>
    select(ISI, temp)
head(ff_train_temp)

lm_spec <- linear_reg() |>
    set_engine("lm") |>
    set_mode("regression")

temp_recipe <- recipe(ISI ~ temp, data = ff_train_temp)

temp_fit <- workflow()|>
    add_recipe(temp_recipe) |>
    add_model(lm_spec) |>
    fit(data = ff_train_temp)

In [None]:
## Temperature Prediction ##

temp_prediction <- temp_fit |>
    predict(ff_test) |>
    bind_cols(ff_test) |>
    select(.pred, month, ISI, temp)
head(temp_prediction)

## Plotting Temperature Prediction ##

temp_prediction_plot <- temp_prediction |>
    ggplot(aes(x = temp, y = ISI)) +
        geom_point(alpha = 0.4) +
        geom_line(mapping = aes(x = temp, y = .pred), color = "blue") +
        labs(x = "Temperature (Celsius)", y = "ISI Index", title = "Fig.2 - ISI (prediction) vs Temperature") +
        theme(text = element_text(size = 12))
temp_prediction_plot

In [None]:
## Temp Plots Side-by-Side ##

temp_plots <- plot_grid(
    ff_temp_plot,
    temp_prediction_plot,
    ncol = 1
    )
temp_plots

In [None]:
## Temp Prediction Accuracy

temp_prediction_results <- temp_fit |>
    predict(ff_test) |>
    bind_cols(ff_test) |>
    metrics(truth = ISI, estimate = .pred)

temp_rmspe <- temp_prediction_results |>
     filter(.metric == "rmse") |>
     select(.estimate) |>
     pull()
temp_rmspe


In [None]:
## ISI vs Wind Plot ##
ff_wind_plot <- ff |>
    ggplot(aes(x = wind, y = ISI)) +
        geom_point(alpha = 0.4) +
        labs(x = "Wind Speed (km/h)", y = "ISI Index", title = "Fig.3 - ISI Index vs Wind Speed") +
        theme(text = element_text(size = 12))
ff_wind_plot

In [None]:
## Wind Regression Model ##
ff_train_wind <- ff_train |>
    select(ISI, wind)
head(ff_train_wind)

lm_spec <- linear_reg() |>
    set_engine("lm") |>
    set_mode("regression")

wind_recipe <- recipe(ISI ~ wind, data = ff_train_wind)

wind_fit <- workflow()|>
    add_recipe(wind_recipe) |>
    add_model(lm_spec) |>
    fit(data = ff_train_wind)

In [None]:
## Wind Prediction ##

wind_prediction <- wind_fit |>
    predict(ff_test) |>
    bind_cols(ff_test) |>
    select(.pred, month, ISI, wind)
head(wind_prediction)

## Plotting Temperature Prediction ##

wind_prediction_plot <- wind_prediction |>
    ggplot(aes(x = wind, y = ISI)) +
        geom_point(alpha = 0.4) +
        geom_line(mapping = aes(x = wind, y = .pred), color = "blue") +
        labs(x = "Wind Speed (km/h)", y = "ISI Index", title = "Fig. 4 - ISI (prediction) vs Wind Speed") +
        theme(text = element_text(size = 12))
wind_prediction_plot

In [None]:
## Wind Plots Side-by-Side ##

wind_plots <- plot_grid(
    ff_wind_plot,
    wind_prediction_plot,
    ncol = 1
    )
wind_plots

In [None]:
## Wind Pred Accuracy ##

wind_prediction_results <- wind_fit |>
    predict(ff_test) |>
    bind_cols(ff_test) |>
    metrics(truth = ISI, estimate = .pred)

wind_rmspe <- wind_prediction_results |>
     filter(.metric == "rmse") |>
     select(.estimate) |>
     pull()
wind_rmspe