In [None]:
# First cell
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from statsmodels.tsa.arima.model import ARIMA
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Second cell
# Load the data
revenue_data = pd.read_csv('../data/raw/revenue_data.csv', parse_dates=['Date'])
revenue_data.set_index('Date', inplace=True)
revenue_data.head()

# Third cell
# Linear Regression
X = (revenue_data.index - revenue_data.index[0]).days.values.reshape(-1, 1)
y = revenue_data['Revenue'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Linear Regression - MSE: {mse:.2f}, R2: {r2:.2f}")

# Fourth cell
# Visualize Linear Regression results
fig = go.Figure()
fig.add_trace(go.Scatter(x=revenue_data.index[X_test.flatten()], y=y_test, mode='markers', name='Actual'))
fig.add_trace(go.Scatter(x=revenue_data.index[X_test.flatten()], y=y_pred, mode='lines', name='Predicted'))
fig.update_layout(title='Revenue Prediction - Linear Regression', xaxis_title='Date', yaxis_title='Revenue')
fig.show()

# Fifth cell
# ARIMA
model = ARIMA(revenue_data['Revenue'], order=(1, 1, 1))
results = model.fit()

forecast = results.forecast(steps=30)
print("ARIMA model fitted and forecast generated.")

# Sixth cell
# Visualize ARIMA results
fig = go.Figure()
fig.add_trace(go.Scatter(x=revenue_data.index, y=revenue_data['Revenue'], mode='lines', name='Historical'))
fig.add_trace(go.Scatter(x=forecast.index, y=forecast, mode='lines', name='Forecast'))
fig.update_layout(title='Revenue Prediction - ARIMA', xaxis_title='Date', yaxis_title='Revenue')
fig.show()