In [None]:
from google.colab import files
uploaded = files.upload()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('LSTM-Multivariate_pollution.csv')
df.head()
df.info()
# Convert 'date' column to datetime
df['date'] = pd.to_datetime(df['date'])

# Extract the hour and store it in a new column
df['hour'] = df['date'].dt.hour
df['date'] = df['date'].dt.date

# Display the updated DataFrame
print(df)
# Aggregate snow data by day
df_daily = df.groupby('date').agg({'snow': 'mean'}).reset_index()

# Convert date to datetime and extract year and month for grouping
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month

# Aggregate by year and month to get the average snow level for each month of each year
df_monthly_avg = df.groupby(['year', 'month'])['snow'].mean().reset_index()

# Pivot data to have months as x-axis and each year as separate lines
df_pivot = df_monthly_avg.pivot(index='month', columns='year', values='snow')

# Plot the data with each year as a separate line
plt.figure(figsize=(12, 6))
for year in df_pivot.columns:
    plt.plot(df_pivot.index, df_pivot[year], label=f'Year {year}')

plt.title("Monthly Snow Levels Over Years")
plt.xlabel("Month")
plt.ylabel("Average Snow Level")
plt.xticks(range(1, 13), labels=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.legend(title="Year")
plt.grid(True)
plt.show()


# Convert date to datetime format and set as index
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

# Calculate the rolling sum with a window size (e.g., 30 days for a monthly rolling sum)
df['rolling_snow_sum'] = df['snow'].rolling(window=365).sum()
df['rolling_temp_sum']=df['temp'].rolling(window=365).sum()
df['rolling_dew_sum']=df['dew'].rolling(window=365).sum()

# Plotting the original and rolling sum values
plt.figure(figsize=(14, 6))

plt.plot(df['rolling_snow_sum'], label='365-Day Rolling Snow Sum', color='orange')
plt.plot(df['rolling_temp_sum'], label='365-Day Rolling Temp Sum', color='blue')
plt.plot(df['rolling_dew_sum'], label='365-Day Rolling Dew Sum', color='green')
plt.title("Yearly snow,temp,dew Levels and 365-Day Rolling Sum")
plt.xlabel("Date")
plt.ylabel("Snow,temp,dew Level")
plt.xlim(pd.to_datetime('2011-01'), pd.to_datetime('2011-12'))

plt.legend()
plt.show()

In [None]:

from statsmodels.tsa.seasonal import seasonal_decompose





# Aggregate daily data to monthly (or you can keep it daily, depending on the data frequency you need)
df_monthly = df['snow'].resample('ME').mean()  # Resample to monthly mean
df_daily = df['snow'].resample('D').mean()  # Resample to daily mean
# Decompose the time series data (set the period according to your data)
decomposition = seasonal_decompose(df_monthly, model='additive', period=12)  # 12 for monthly seasonality if data is monthly
decomposition1 = seasonal_decompose(df_daily, model='additive', period=365)  # 365 for daily seasonality if data is daily
# Plotting the decomposed components
plt.figure(figsize=(14, 10))
plt.subplot(411)
plt.plot(decomposition.observed, label='Observed', color='blue')

plt.legend(loc='upper left')
plt.subplot(412)
plt.plot(decomposition.trend, label='Trend', color='orange')
plt.legend(loc='upper left')
plt.subplot(413)
plt.plot(decomposition.seasonal, label='Seasonality', color='green')
plt.legend(loc='upper left')
plt.subplot(414)
plt.plot(decomposition.resid, label='Residuals', color='red')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import acf, pacf

# Aggregate rain data by day (sum)
df_daily_rain = df.resample('D').sum()  # Daily total rainfall
print(df_daily_rain.head())

# 2. Plot ACF and PACF to help determine 'p' and 'q'
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
plot_acf(df_daily_rain['rain'], ax=ax1)
plot_pacf(df_daily_rain['rain'], ax=ax2)
plt.show()

# 3. Determine 'p' and 'q' based on first significant lags
# Calculate ACF and PACF values
from statsmodels.tsa.stattools import acf, pacf

acf_vals = acf(df_daily_rain['rain'], fft=False)
pacf_vals = pacf(df_daily_rain['rain'])

# Determine 'p': the lag where PACF cuts off
p = next((i for i, val in enumerate(pacf_vals) if abs(val) < 1.96/np.sqrt(len(df_daily_rain['rain']))), 0)

# Determine 'q': the lag where ACF cuts off
q = next((i for i, val in enumerate(acf_vals) if abs(val) < 1.96/np.sqrt(len(df_daily_rain['rain']))), 0)
print(f"p: {p}, q: {q}")



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

# Example p and q values based on ACF and PACF plots
p = 3 # Replace with your observed p from PACF plot
q = 2  # Replace with your observed q from ACF plot

# Assuming df_daily_rain['rain'] is our time series data for rain
rain_series = df_daily_rain['rain'].dropna()

# 1. Applying the AR Model (Auto Regressive)
print("\n--- Auto Regressive (AR) Model ---")
ar_model = AutoReg(rain_series, lags=p).fit()
ar_predictions = ar_model.predict(start=p, end=len(rain_series) - 1)
ar_mse = mean_squared_error(rain_series[p:], ar_predictions)
print(f"AR Model Mean Squared Error: {ar_mse}")

# Plotting AR model predictions
plt.figure(figsize=(12, 6))
plt.plot(rain_series, label="Actual Rain Data")
plt.plot(ar_predictions, color="red", linestyle="--", label="AR Model Predictions")
plt.title(f"Auto Regressive (AR) Model with p={p}")
plt.xlabel("Date")
plt.ylabel("Rain")
plt.legend()
plt.show()

# 2. Applying the MA Model (Moving Average)
print("\n--- Moving Average (MA) Model ---")
# Using ARIMA with order (0, 0, q) for a pure MA model
ma_model = ARIMA(rain_series, order=(0, 1, q)).fit()
ma_predictions = ma_model.predict(start=q, end=len(rain_series) - 1)
ma_mse = mean_squared_error(rain_series[q:], ma_predictions)
print(f"MA Model Mean Squared Error: {ma_mse}")

# Plotting MA model predictions
plt.figure(figsize=(12, 6))
plt.plot(rain_series, label="Actual Rain Data")
plt.plot(ma_predictions, color="red", linestyle="--", label="MA Model Predictions")
plt.title(f"Moving Average (MA) Model with q={q}")
plt.xlabel("Date")
plt.ylabel("Rain")
plt.legend()
plt.show()


In [None]:
from statsmodels.tsa.stattools import adfuller
adf_test = adfuller(df_daily_rain['rain'])
print(f'ADF Statistic: {adf_test[0]}')
print(f'p-value: {adf_test[1]}')
if adf_test[1] < 0.05:  # p-value < 0.05 suggests stationarity
    d = 0
else:
    d = 1
print(f'd: {d}')

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import mean_squared_error
import warnings

warnings.filterwarnings("ignore")
# Define ARIMA parameters
p = 3  # Example based on PACF plot
d = 0 # Example (depends on ADF test results)
q = 2  # Example based on ACF plot

# Fit ARIMA model
arima_model = ARIMA(df_daily_rain['rain'], order=(p, d, q))
arima_result = arima_model.fit()

# Print model summary
print(arima_result.summary())

# Forecast and plot ARIMA results
plt.figure(figsize=(10, 6))
plt.plot(df_daily_rain.index, df_daily_rain['rain'], label='Actual Rain Data')
plt.plot(df_daily_rain.index, arima_result.fittedvalues, color='red', label='ARIMA Model Predictions')
plt.legend()
plt.title(f'ARIMA Model (p={p}, d={d}, q={q})')
plt.show()

# Calculate MSE for ARIMA
arima_mse = mean_squared_error(df_daily_rain['rain'], arima_result.fittedvalues)
print(f'ARIMA Model Mean Squared Error: {arima_mse}')


In [None]:
# Define SARIMA parameters
P, D, Q, s = 1, 0, 1, 365  # Example values; s=365 for daily data with yearly seasonality

# Fit SARIMA model
sarima_model = SARIMAX(df_daily_rain['rain'], order=(p, d, q), seasonal_order=(P, D, Q, s))
sarima_result = sarima_model.fit()

# Print model summary
print(sarima_result.summary())

# Forecast and plot SARIMA results
plt.figure(figsize=(10, 6))
plt.plot(df_daily_rain.index, df_daily_rain['rain'], label='Actual Rain Data')
plt.plot(df_daily_rain.index, sarima_result.fittedvalues, color='orange', label='SARIMA Model Predictions')
plt.legend()
plt.title(f'SARIMA Model (p={p}, d={d}, q={q}) x (P={P}, D={D}, Q={Q}, s={s})')
plt.show()

# Calculate MSE for SARIMA
sarima_mse = mean_squared_error(df_daily_rain['rain'], sarima_result.fittedvalues)
print(f'SARIMA Model Mean Squared Error: {sarima_mse}')
