In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.arima.model import ARIMA
from fbprophet import Prophet
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Load the dataset
data = pd.read_csv("/workspace/Jupyter-Notebooks/MA/PRSA_Data_Guanyuan_20130301-20170228.csv")

# Artificially degrade the data by adding noise
noise = np.random.normal(0, 0.5, len(data))
data["PM2.5"] = data["PM2.5"] + noise

# Split the data into training and testing sets
train_size = int(len(data) * 0.8)
train, test = data[:train_size], data[train_size:]

# Define a function to evaluate the root mean squared error (RMSE) of a model
def evaluate_model(y, yhat):
    return np.sqrt(mean_squared_error(y, yhat))

# Time series modeling with ARIMA
model = ARIMA(train["PM2.5"], order=(1, 1, 1))
model_fit = model.fit()
yhat_arima = model_fit.predict(len(train), len(data)-1, typ="levels")
rmse_arima = evaluate_model(test["PM2.5"], yhat_arima)

# Time series modeling with Prophet
model = Prophet()
model.fit(train[["ds", "PM2.5"]].rename(columns={"ds": "ds", "PM2.5": "y"}))
future = model.make_future_dataframe(periods=len(test), freq="H")
yhat_prophet = model.predict(future)["yhat"].tail(len(test))
rmse_prophet = evaluate_model(test["PM2.5"], yhat_prophet)

# Time series modeling with LSTM
window_size = 24
train_X, train_y = [], []
for i in range(window_size, len(train)):
    train_X.append(train["PM2.5"][i-window_size:i])
    train_y.append(train["PM2.5"][i])
train_X, train_y = np.array(train_X), np.array(train_y)
train_X = np.reshape(train_X, (train_X.shape[0], train_X.shape[1], 1))
model = Sequential()
model.add(LSTM(50, input_shape=(window_size, 1)))
model.add(Dense(1))
model.compile(loss="mse", optimizer="adam")
model.fit(train_X, train_y, epochs=50, batch_size=72, verbose=0)
test_X = []
for i in range(window_size, len(test)):
    test_X.append(test["PM2.5"][i-window_size:i])
test_X = np.array(test_X)
test_X = np.reshape(test_X, (test_X.shape[0], test_X.shape[1], 1))
yhat_lstm = model.predict(test_X, verbose=0)
yhat_lstm = np.squeeze(yhat_lstm)
rmse_lstm = evaluate_model(test["PM2.5"], yhat_lstm)

# Print the RMSE of each model
print("ARIMA RMSE: %.3f" % rmse_arima)
print("Prophet RMSE: %.3f" % rmse_prophet)
print("LSTM RMSE: %.3f" % rmse_lstm)


ModuleNotFoundError: No module named 'fbprophet'

In [7]:
!pip list | grep fbprophet