In [5]:
# Importing libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error
import pickle
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go

# Load and prepare data
def load_and_prepare_data(file_path):
    data = pd.read_pickle(file_path)
    
    # Aggregate the data by product_id and day of the week
    data_grouped = data.groupby(['product_id', 'day']).agg({
        'purchase': 'sum',
        'price': 'mean',
        'week': 'mean',
        'month': 'mean',
        'year': 'mean',
        'cart': 'mean',
        'view': 'mean',
        'category_num': 'mean'
    }).reset_index()
    
    return data_grouped

# Function to train the model incrementally
def train_incremental_model(data_grouped, model=None, scaler=None):
    X = data_grouped[['price', 'day', 'week', 'month', 'year', 'cart', 'view', 'category_num']]
    y = data_grouped['purchase']
    
    if scaler is None:
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
    else:
        X = scaler.transform(X)
    
    if model is None:
        model = SGDRegressor(max_iter=1000, tol=1e-3)
        model.partial_fit(X, y)
    else:
        model.partial_fit(X, y)
    
    return model, scaler

# Load initial data and train the model
file_path = 'forecasting_dataset.pkl'
data_grouped = load_and_prepare_data(file_path)
model, scaler = train_incremental_model(data_grouped)

# Save the model and the scaler
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Get the list of unique product_ids for the dropdown
product_ids = data_grouped['product_id'].unique()

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    html.H1("Sales Prediction"),
    html.Label("Product ID:"),
    dcc.Dropdown(
        id='product_id',
        options=[{'label': str(pid), 'value': pid} for pid in product_ids],
        value=product_ids[0]
    ),
    html.Button('Predict', id='predict-button', n_clicks=0),
    dcc.Graph(id='prediction-graph'),
    html.Button('Load New Data', id='load-data-button', n_clicks=0)
])

# Callback to update the prediction
@app.callback(
    Output('prediction-graph', 'figure'),
    Input('predict-button', 'n_clicks'),
    Input('product_id', 'value')
)
def update_prediction(n_clicks, product_id):
    if n_clicks > 0:
        # Load the model and the scaler
        with open('model.pkl', 'rb') as f:
            model = pickle.load(f)
        with open('scaler.pkl', 'rb') as f:
            scaler = pickle.load(f)
        
        # Filter the data for the given product
        product_data = data_grouped[data_grouped['product_id'] == product_id]

        if product_data.empty:
            return {
                'data': [],
                'layout': go.Layout(
                    title='No data available for this product'
                )
            }

        # Make predictions for each day of the week
        predictions = []
        for day in range(1, 8):
            input_data = product_data[product_data['day'] == day][['price', 'day', 'week', 'month', 'year', 'cart', 'view', 'category_num']]
            if input_data.empty:
                predictions.append(0)
            else:
                input_data = scaler.transform(input_data)
                prediction = model.predict(input_data)
                predictions.append(prediction[0])

        # Create the graph
        figure = {
            'data': [
                go.Bar(
                    x=list(range(1, 8)),
                    y=predictions,
                    name='Predicted Sales'
                )
            ],
            'layout': go.Layout(
                title=f'Sales Prediction for Product ID: {product_id}',
                xaxis={'title': 'Day of the Week'},
                yaxis={'title': 'Predicted Sales'}
            )
        }
        return figure

    return {
        'data': [],
        'layout': go.Layout(
            title='Select a product ID and press "Predict"'
        )
    }

# Callback to load and train the model with new data
@app.callback(
    Output('load-data-button', 'children'),
    Input('load-data-button', 'n_clicks')
)
def load_new_data(n_clicks):
    if n_clicks > 0:
        # Load new data and combine it
        new_data_path = 'forecasting_dataset_new.pkl'
        new_data_grouped = load_and_prepare_data(new_data_path)
        
        global data_grouped
        data_grouped = pd.concat([data_grouped, new_data_grouped]).drop_duplicates().reset_index(drop=True)
        
        # Retrain the model with the combined data
        model, scaler = train_incremental_model(data_grouped, model, scaler)
        
        # Save the updated model and the scaler
        with open('model.pkl', 'wb') as f:
            pickle.dump(model, f)
        with open('scaler.pkl', 'wb') as f:
            pickle.dump(scaler, f)
        
        return 'New Data Loaded and Model Updated'
    
    return 'Load New Data'

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True, port=8065)
