# Take 1

In [None]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import RandomForest
from darts.metrics import mape

# Create sample dataset
np.random.seed(42)
date_range = pd.date_range(start='2020-01-01', end='2021-12-31', freq='D')
values = np.random.rand(len(date_range)) * 100 + np.arange(len(date_range)) * 0.1  # Add trend
df = pd.DataFrame({'date': date_range, 'value': values})
df.set_index('date', inplace=True)

# Convert to Darts TimeSeries
series = TimeSeries.from_dataframe(df, value_cols=['value'])

# Define lags
lags = 14  # Use the last 14 days to predict the next day

# Split into train and test sets
train, test = series[:-30], series[-30:]

# Create and train the model
model = RandomForest(lags=lags, n_estimators=100, random_state=42)
model.fit(train)

# Make predictions
predictions = model.predict(len(test))

# Calculate MAPE
mape_score = mape(test, predictions)
print(f"MAPE: {mape_score:.2f}%")

# Save the model
model.save("random_forest_model.pkl")

print("Model saved successfully.")

# Take 2

## Plotly functions

### plot_time_series

In [2]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from typing import List
import pandas as pd


pio.templates.default = "plotly_dark"
# pio.templates._available_templates_str()
# --> Available templates:
# [   'ggplot2', 'seaborn', 'simple_white', 'plotly',
#      'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
#      'ygridoff', 'gridon', 'none'   ]


def plot_time_series(plots: List[pd.DataFrame], title: str = 'Shampoo Sales', x: str = 'date', y: str = 'value') -> None:
    
    if not isinstance(plots, list):
        plots = [plots]

    # Loop through the dataframes and create the plots
    fig = go.Figure()
    dumb_map = {0: 'Sales', 1: 'Predictions'}

    for i, df in enumerate(plots):
        fig.add_trace(
            go.Line(
                x=df[x],
                y=df[y],
                mode='lines',
                name=dumb_map[i]
            )
        )

    fig.update_layout(
        title=title,
        xaxis_title="Date",
        yaxis_title="Number of Sales",
        hovermode="x unified"
    )

    fig.update_traces(
        hovertemplate="%{y}<extra></extra>"
    )

    fig.update_xaxes(
        tickformat="%B %d, %Y",
        hoverformat="%B %d, %Y"
    )

    fig.show()

    # Add a subtle watermark
    fig.add_annotation(
        text="Brasse Vannie Kaap",
        x=0.5,
        y=1.05,
        xref="paper",
        yref="paper",
        showarrow=False,
        font=dict(size=10, color="lightgrey"),
        opacity=0.7
    )

    # Show the plot
    fig.show()

### plot_time_series_with_error_bands

In [3]:
import plotly.graph_objects as go
from typing import List
import pandas as pd
from darts import TimeSeries

def plot_time_series_with_error_bands(
    actual_series: List[TimeSeries],
    forecast_series: List[TimeSeries],
    title: str = 'Time Series Forecast',
    x: str = 'date',
    y: str = 'value'
) -> None:
    fig = go.Figure()

    # Plot actual series
    for i, series in enumerate(actual_series):
        df = series.pd_dataframe().reset_index()
        fig.add_trace(
            go.Scatter(
                x=df[x],
                y=df[y],
                mode='lines',
                name=f'Actual Series {i+1}',
                line=dict(color=f'rgba(0, 0, 255, {0.8 - i*0.2})')
            )
        )

    # Plot forecast series with error bands if available
    for i, forecast in enumerate(forecast_series):
        df = forecast.pd_dataframe().reset_index()
        
        # Check if confidence intervals are available
        has_ci = f'{y}_lower' in df.columns and f'{y}_upper' in df.columns
        
        if has_ci:
            lower_bound = df[f'{y}_lower']
            upper_bound = df[f'{y}_upper']
            
            # Add error band
            fig.add_trace(
                go.Scatter(
                    x=df[x].tolist() + df[x][::-1].tolist(),
                    y=upper_bound.tolist() + lower_bound[::-1].tolist(),
                    fill='toself',
                    fillcolor=f'rgba(255, 0, 0, {0.2 - i*0.05})',
                    line=dict(color='rgba(255, 255, 255, 0)'),
                    name=f'Confidence Interval {i+1}',
                    showlegend=True
                )
            )
        
        # Add forecast line
        fig.add_trace(
            go.Scatter(
                x=df[x],
                y=df[y],
                mode='lines',
                name=f'Forecast {i+1}',
                line=dict(color=f'rgba(255, 0, 0, {0.8 - i*0.2})')
            )
        )

    fig.update_layout(
        title=title,
        xaxis_title=x,
        yaxis_title=y,
        hovermode="x unified",
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        )
    )

    fig.update_traces(
        hovertemplate="%{y}<extra></extra>"
    )

    fig.update_xaxes(
        tickformat="%B %d, %Y",
        hoverformat="%B %d, %Y"
    )

    fig.show()

## Train and Save models

In [None]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import RandomForest
from darts.metrics import mape


# Load the dataset
df = pd.read_csv('shampoo.csv', parse_dates=['Month'])

print(df.head())
print(df.describe())

# df.set_index('Month', inplace=True)
df.columns = ['month', 'value']

print(df.head())

print(df.dtypes)
print(df['month'].unique())

import sys

# ----------------- Create `Date` column from `Month` column ----------------- #
df['month'] = df['month'].apply(lambda x: f'200{x}')

date_format = "%Y-%m"
df['date'] = df['month'].apply(lambda x: pd.to_datetime(x, format=date_format))

df.drop(columns=['month'], inplace=True)

print(df.head())
# ------------------------------------- s ------------------------------------ #

# Convert to Darts TimeSeries
print("----------------      Convert to Darts TimeSeries      ----------------")
series = TimeSeries.from_dataframe(df,time_col='date', value_cols=['value'])

# Print series details
print(len(series))
print(series.mean())


# Define lags
lags = 14  # Use the last 14 days to predict the next day

# Split into train and test sets
train, test = series[:-12], series[-12:]

# Create and train the model
from darts.models import VARIMA, XGBModel, RandomForest
from darts.models.forecasting.forecasting_model import ForecastingModel

for name, model in {
    # 'varima': VARIMA(p=1, d=0, q=0),   # ValueError: Train series only contains 24 elements but VARIMA() model requires at least 30 entries
    'xgb': XGBModel(lags=lags, n_estimators=100, random_state=42),
    'random_forest': RandomForest(lags=lags, n_estimators=100, random_state=42)
}.items():
    model: ForecastingModel
    model.fit(train)

    # Make predictions
    predictions = model.predict(len(test))

    # Calculate MAPE
    mape_score = mape(test, predictions)
    print(f"MAPE: {mape_score:.2f}%")

    # Calculate error bands

    # Plot the predictions and the actual values
    plot_time_series_with_error_bands(actual_series=[series], forecast_series=[predictions], title=f'Shampoo Sales Forecast Vs Actual ({name})')
    # plot_time_series([pd.DataFrame(series), pd.DataFrame(predictions)], title='Shampoo Sales Forecast Vs Actual')

    # Save the model
    model.save(f"{name}_model.pkl")

    print(f"Model saved successfully ({name}).")