In [None]:
# Importing required libraries 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose 
from pmdarima import auto_arima 

In [177]:
#Data import and exploration
data = pd.read_csv("C:/Users/Newone.csv")

#Remove unnecessary columns
data.drop(columns=['Unnamed: 6','Unnamed: 7','Unnamed: 8','Unnamed: 9'], inplace=True)

#displa dataset information
print("Dataset length: ", len(data))
print("Dataset shape: ", data.shape)
print("Dataset: ", data.head())

data 

Dataset length:  149
Dataset shape:  (149, 6)
Dataset:         Date  Time (24h)  Humidity(%) Weather  Temperature (C)   \
0  08-02-23          17        78.25   Sunny              27.0   
1  08-02-23          17        79.34   Sunny              28.6   
2  08-02-23           9        71.81   Sunny              29.1   
3  08-02-23          11        68.31   Windy              29.8   
4  08-05-23          10        68.72  Cloudy              29.3   

              Location  
0  6.867831, 79.967030  
1  6.867818, 79.967296  
2  6.867831, 79.967030  
3  6.867831, 79.967030  
4  6.867451, 79.967261  


Unnamed: 0,Date,Time (24h),Humidity(%),Weather,Temperature (C),Location
0,08-02-23,17,78.25,Sunny,27.0,"6.867831, 79.967030"
1,08-02-23,17,79.34,Sunny,28.6,"6.867818, 79.967296"
2,08-02-23,9,71.81,Sunny,29.1,"6.867831, 79.967030"
3,08-02-23,11,68.31,Windy,29.8,"6.867831, 79.967030"
4,08-05-23,10,68.72,Cloudy,29.3,"6.867451, 79.967261"
...,...,...,...,...,...,...
144,28-08-23,11,70.24,Rainy,31.1,"6.867818, 79.967296"
145,08-02-23,11,59.38,Sunny,29.0,"6.867818, 79.967501"
146,08-11-23,18,81.41,Cloudy,27.8,"6.867818, 79.967501"
147,20-08-23,10,62.17,Sunny,30.9,"6.867818, 79.967296"


In [178]:
# Replace location names
location_mapping = {
    "6.867831, 79.967030": 1,
    "6.867818, 79.967296": 2,
    "6.867818, 79.967501": 3,
    "6.867690, 79.967585": 4,
    "6.867536, 79.967425": 5,
    "6.867451, 79.967261": 6,
    "6.867603, 79.967033": 7
}
data['Location'] = data['Location'].map(location_mapping)

data

Unnamed: 0,Date,Time (24h),Humidity(%),Weather,Temperature (C),Location
0,08-02-23,17,78.25,Sunny,27.0,1
1,08-02-23,17,79.34,Sunny,28.6,2
2,08-02-23,9,71.81,Sunny,29.1,1
3,08-02-23,11,68.31,Windy,29.8,1
4,08-05-23,10,68.72,Cloudy,29.3,6
...,...,...,...,...,...,...
144,28-08-23,11,70.24,Rainy,31.1,2
145,08-02-23,11,59.38,Sunny,29.0,3
146,08-11-23,18,81.41,Cloudy,27.8,3
147,20-08-23,10,62.17,Sunny,30.9,2


In [179]:
#Change the "Weather" column values into numerical values
weather_mapping = {'Sunny': 0, 'Cloudy':1, 'Rainy': 2, 'Windy': 3}

#Replace values in the Weather column
data['Weather'] = data['Weather'].replace(weather_mapping)

# Convert 'Date' to datetime format
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%y')

In [180]:
# Convert 'Date' to datetime format
data['Time (24h)'] = pd.to_datetime(data['Time (24h)'], format='%H')

# Extract time part from 'Time (24h)' column
time_part = data['Time (24h)'].dt.time

# Combine 'Date' column and time_part
merged_datetime = data['Date'] + pd.to_timedelta(time_part.astype(str))

# Assign the merged datetime to a new column
data['Datetime'] = merged_datetime

# Drop the individual 'Date' and 'Time (24h)' columns if needed
data.drop(columns=['Date', 'Time (24h)'], inplace=True)

# Set 'Datetime' column as the index
#data.set_index('Datetime', inplace=True)

data

Unnamed: 0,Humidity(%),Weather,Temperature (C),Location,Datetime
0,78.25,0,27.0,1,2023-02-08 17:00:00
1,79.34,0,28.6,2,2023-02-08 17:00:00
2,71.81,0,29.1,1,2023-02-08 09:00:00
3,68.31,3,29.8,1,2023-02-08 11:00:00
4,68.72,1,29.3,6,2023-05-08 10:00:00
...,...,...,...,...,...
144,70.24,2,31.1,2,2023-08-28 11:00:00
145,59.38,0,29.0,3,2023-02-08 11:00:00
146,81.41,1,27.8,3,2023-11-08 18:00:00
147,62.17,0,30.9,2,2023-08-20 10:00:00


In [181]:
print(data.columns)

Index(['Humidity(%)', 'Weather', 'Temperature (C) ', 'Location', 'Datetime'], dtype='object')


### ARIMA Model for Time Series Forecasting

In [182]:
# Fit auto_arima function to dataset 
stepwise_fit = auto_arima(data['Location'], start_p=1, start_q=1, 
                          max_p=3, max_q=3, m=7,  # Changed m=7 for a seasonal period of 7 days
                          start_P=0, seasonal=True, 
                          d=None, D=1, trace=True, 
                          error_action='ignore',   # don't want to know if an order does not work 
                          suppress_warnings=True,  # don't want convergence warnings 
                          stepwise=True)           # set to stepwise 
  
# To print the summary 
stepwise_fit.summary() 


Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,1,1)[7] intercept   : AIC=inf, Time=0.18 sec
 ARIMA(0,0,0)(0,1,0)[7] intercept   : AIC=631.026, Time=0.00 sec
 ARIMA(1,0,0)(1,1,0)[7] intercept   : AIC=594.164, Time=0.07 sec
 ARIMA(0,0,1)(0,1,1)[7] intercept   : AIC=inf, Time=0.23 sec
 ARIMA(0,0,0)(0,1,0)[7]             : AIC=629.049, Time=0.01 sec
 ARIMA(1,0,0)(0,1,0)[7] intercept   : AIC=632.868, Time=0.05 sec
 ARIMA(1,0,0)(2,1,0)[7] intercept   : AIC=586.033, Time=0.13 sec
 ARIMA(1,0,0)(2,1,1)[7] intercept   : AIC=inf, Time=0.44 sec
 ARIMA(1,0,0)(1,1,1)[7] intercept   : AIC=inf, Time=0.33 sec
 ARIMA(0,0,0)(2,1,0)[7] intercept   : AIC=584.475, Time=0.10 sec
 ARIMA(0,0,0)(1,1,0)[7] intercept   : AIC=592.474, Time=0.05 sec
 ARIMA(0,0,0)(2,1,1)[7] intercept   : AIC=inf, Time=0.40 sec
 ARIMA(0,0,0)(1,1,1)[7] intercept   : AIC=inf, Time=0.25 sec
 ARIMA(0,0,1)(2,1,0)[7] intercept   : AIC=585.995, Time=0.10 sec
 ARIMA(1,0,1)(2,1,0)[7] intercept   : AIC=582.322, Time=0.33 sec
 ARIMA

0,1,2,3
Dep. Variable:,y,No. Observations:,149.0
Model:,"SARIMAX(1, 0, 1)x(2, 1, [], 7)",Log Likelihood,-285.168
Date:,"Thu, 07 Mar 2024",AIC,580.336
Time:,03:02:45,BIC,595.115
Sample:,0,HQIC,586.341
,- 149,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,-0.8256,0.099,-8.350,0.000,-1.019,-0.632
ma.L1,0.9453,0.066,14.280,0.000,0.816,1.075
ar.S.L7,-0.6424,0.084,-7.667,0.000,-0.807,-0.478
ar.S.L14,-0.2751,0.069,-3.994,0.000,-0.410,-0.140
sigma2,3.1798,0.352,9.025,0.000,2.489,3.870

0,1,2,3
Ljung-Box (L1) (Q):,0.11,Jarque-Bera (JB):,2.98
Prob(Q):,0.74,Prob(JB):,0.23
Heteroskedasticity (H):,0.39,Skew:,0.29
Prob(H) (two-sided):,0.0,Kurtosis:,3.4


### Fit ARIMA Model to the dataset

In [183]:
# Split data into train / test sets 
train = data.iloc[:len(data)-7] 
test = data.iloc[len(data)-7:]  
  
# Fit a SARIMAX(1, 0, 1)x(2, 1, [], 7) on the training set 
from statsmodels.tsa.statespace.sarimax import SARIMAX 
  
model = SARIMAX(train['Location'],  
                order=(1, 0, 1),  
                seasonal_order=(2, 1, [], 7)) 
  
result = model.fit() 
result.summary() 


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


0,1,2,3
Dep. Variable:,Location,No. Observations:,142.0
Model:,"SARIMAX(1, 0, 1)x(2, 1, [], 7)",Log Likelihood,-273.112
Date:,"Thu, 07 Mar 2024",AIC,556.225
Time:,03:02:48,BIC,570.751
Sample:,0,HQIC,562.128
,- 142,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,-0.8235,0.103,-8.014,0.000,-1.025,-0.622
ma.L1,0.9443,0.069,13.702,0.000,0.809,1.079
ar.S.L7,-0.6256,0.088,-7.125,0.000,-0.798,-0.453
ar.S.L14,-0.2611,0.071,-3.661,0.000,-0.401,-0.121
sigma2,3.2762,0.376,8.710,0.000,2.539,4.013

0,1,2,3
Ljung-Box (L1) (Q):,0.18,Jarque-Bera (JB):,2.29
Prob(Q):,0.67,Prob(JB):,0.32
Heteroskedasticity (H):,0.4,Skew:,0.26
Prob(H) (two-sided):,0.0,Kurtosis:,3.36


### Predictions of ARIMA Model against the test set

In [184]:
start = len(train) 
end = len(train) + len(test) - 1
  
# Predictions for one-year against the test set 
predictions = result.predict(start, end, 
                             typ = 'levels').rename("Predictions") 


print(predictions)

print("     ")

rounde_predictions = [round(value) for value in predictions]
print("Predicted Location: ", rounde_predictions)

# plot predictions and actual values 
#predictions.plot(legend = True) 
#test['Location'].plot(legend = True) 

142    2.071237
143    2.895247
144    3.348980
145    4.126063
146    2.809365
147    3.185231
148    3.576347
Name: Predictions, dtype: float64
     
Predicted Location:  [2, 3, 3, 4, 3, 3, 4]




### Evaluate the model using MSE and RMSE

In [185]:
# Load specific evaluation tools 
from sklearn.metrics import mean_squared_error 
from statsmodels.tools.eval_measures import rmse 
  
# Calculate root mean squared error 
print("RMSE:", rmse(test["Location"], predictions))
  
# Calculate mean squared error 
print("MSE:", mean_squared_error(test["Location"], predictions)) 

RMSE: 1.238942656983279
MSE: 1.534978907292787


- The RMSE of approximately 1.239 indicates that, on average, the ARIMA model's predictions are around 1.239 units away from the actual values. This indicates a relatively small average error, which suggests that the model's performance is quite good.

- The MSE of approximately 1.535 indicates that, on average, the squared errors between predicted and actual values amount to around 1.535 units. This is also a relatively low value, indicating good performance by the model.

- In summary, based on these metrics, the ARIMA model appears to perform well in terms of accurately predicting the values. However, it's essential to consider other factors and evaluate the model's performance comprehensively, such as examining residuals, analyzing forecast plots, and comparing against alternative models or benchmarks.

### Forecast using ARIMA Model

In [186]:
# Train the model (ARIMA(1,0,1)(2,1,0)[7]) on the full dataset 
model = model = SARIMAX(data['Location'],  
                        order = (1,0,1),  
                        seasonal_order =(2, 1, 0, 7)) 
result = model.fit() 
  
# Forecast for the next 7 days
forecast = result.predict(start = len(data),  
                          end = (len(data)-1) + 1 * 7,  
                          typ = 'levels').rename('Forecast') 

print(forecast)

print("     ")

rounded_forecast = [round(value) for value in forecast]
print("Location: ",rounded_forecast)
  
# Plot the forecast values 
#data['Location'].plot(figsize = (12, 5), legend = True) 
#forecast.plot(legend = True) 

  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


149    2.863753
150    2.877397
151    3.385972
152    3.743158
153    2.793878
154    2.952704
155    3.056666
Name: Forecast, dtype: float64
     
Location:  [3, 3, 3, 4, 3, 3, 3]


