<a href="https://colab.research.google.com/github/GeorgeShmelin/Masterschool_time_series_Project_supermarket_favorita/blob/main/Project_App_Entry_Point.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Step 1: Prepare Environment in Google Colab

In [2]:
# !pip install xgboost
# !pip install streamlit

In [3]:
import os # Save the trained model
import joblib
import sys # Add the project's root directory (parent of the notebook's directory) to sys.path

In [4]:
import gc # gc.collect() are for rubbish collection

In [5]:
# !pip install xgboost
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV

In [6]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from hyperopt import STATUS_OK

In [7]:
# pip install hyperopt
from hyperopt import hp

## app/main.py

In [8]:
# !pip install streamlit -q
# !pip install cloudflared

In [9]:
# !pip install streamlit

In [10]:
# !pip install -q pyngrok

In [11]:
import streamlit as st

In [12]:
from pyngrok import ngrok

# Create Web App Script (app.py)

## Data

In [13]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
df_download = pd.read_csv('/content/drive/MyDrive/MasterSchool/MasterSchool_Python/Time_Series/Time_series_project/df_train_3.csv')
df_download.shape

(26307, 24)

In [15]:
df = df_download.copy()
df.sample(2)

Unnamed: 0,unit_sales,store_nbr,item_nbr,onpromotion,dcoilwtico,transferred,item_class,transactions,weighted_unit_sales,date,...,year_scaled,city_Daule,city_Guayaquil,city_Libertad,city_Playas,city_nan,items_family_BEVERAGES,items_family_CLEANING,items_family_GROCERY I,items_family_nan
12117,2.0,51,1099990,0,90.74,0,1030,1748,2.0,2014-10-01,...,-3,0,1,0,0,0,0,0,1,0
5144,8.0,30,651326,0,107.04,0,1036,693,8.0,2014-06-25,...,-3,0,1,0,0,0,0,0,1,0


Split dataset based on the date column (df['date']):

	•	Data before 01.01.2015 → this will be used for training.
	•	Data on or after 01.01.2015 → this will be used for forecast/prediction.

In [16]:
df['date'] = pd.to_datetime(df['date'], errors='coerce') # Make Sure date Column is in datetime Format
df['date'] = pd.to_datetime(df['date'], dayfirst=True)

In [17]:
cutoff = pd.to_datetime('2015-01-01') # Split the Data

# Training data (before Dec 10, 2014)
train_df = df[df['date'] < '2014-12-10']

# Forecast data (on or after Jan 1, 2015)
forecast_df = df[df['date'] >= cutoff]

In [18]:
'''
(Optional) Save Split Files for Later Use

train_df.to_csv('train_data.csv', index=False)
forecast_df.to_csv('forecast_data.csv', index=False)
'''

"\n(Optional) Save Split Files for Later Use\n\ntrain_df.to_csv('train_data.csv', index=False)\nforecast_df.to_csv('forecast_data.csv', index=False)\n"

## Load my trained model

In [19]:
model_path = '/content/drive/MyDrive/MasterSchool/MasterSchool_Python/Time_Series/Time_series_project/xgboost_models/xgb_final_model.pkl'
model = joblib.load(model_path)

In [20]:
features_used_for_training = ['store_nbr', 'item_nbr', 'day_of_week', 'month']

# Select only the columns that the model was trained on
X_forecast = forecast_df[features_used_for_training]

# Predict
predictions = model.predict(X_forecast)

# Add predictions to the dataframe
forecast_df['prediction'] = predictions

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  forecast_df['prediction'] = predictions


In [21]:
forecast_df.sample(5)

Unnamed: 0,unit_sales,store_nbr,item_nbr,onpromotion,dcoilwtico,transferred,item_class,transactions,weighted_unit_sales,date,...,city_Daule,city_Guayaquil,city_Libertad,city_Playas,city_nan,items_family_BEVERAGES,items_family_CLEANING,items_family_GROCERY I,items_family_nan,prediction
21866,21.0,35,573832,0,49.25,0,1016,691,21.0,2015-02-02,...,0,0,0,1,0,0,0,1,0,0.0
22867,4.0,26,657869,0,48.8,0,1004,478,4.0,2015-02-15,...,0,1,0,0,0,0,0,1,0,0.0
19769,3.0,28,577745,0,52.72,0,1032,1245,3.0,2015-01-03,...,0,1,0,0,0,0,0,1,0,0.0
20033,9.0,36,864508,0,48.69,0,1048,1073,9.0,2015-01-07,...,0,0,1,0,0,0,0,1,0,0.0
24019,2.0,34,848765,0,49.59,0,1058,2468,2.0,2015-03-02,...,0,1,0,0,0,0,0,1,0,0.0


	•	🟢 Loaded the model
	•	🟢 Selected the correct features: ['store_nbr', 'item_nbr', 'day_of_week', 'month']
	•	🟢 Generated predictions and appended them to forecast_df


Build the Streamlit App (UI) to Use Your Model

Now I'm going to create a simple Streamlit app that:

	•	Lets the user enter values for:
	•	store_nbr
	•	item_nbr
	•	day_of_week
	•	month
	•	Runs your XGBoost model
	•	Displays the predicted result

In [22]:
%%writefile app.py
# import streamlit as st
# import pandas as pd
# import joblib

# Load model
model = joblib.load('xgboost_model.pkl')

st.title("Sales Forecasting App")

st.markdown("Enter the input features to get a sales forecast:")

# Input fields
store_nbr = st.number_input("Store Number", min_value=1, value=1)
item_nbr = st.number_input("Item Number", min_value=1, value=1)
day_of_week = st.selectbox("Day of Week (1=Mon, 7=Sun)", list(range(1, 8)))
month = st.selectbox("Month (1=Jan, 12=Dec)", list(range(1, 13)))

# Predict
if st.button("Predict Sales"):
    input_df = pd.DataFrame({
        'store_nbr': [store_nbr],
        'item_nbr': [item_nbr],
        'day_of_week': [day_of_week],
        'month': [month]
    })

    prediction = model.predict(input_df)
    st.success(f"Predicted sales: {prediction[0]:.2f}")

Overwriting app.py


In [23]:
# ✅ Step 1: Install required packages (only needs to be done once per session)
!pip install -q streamlit pyngrok

# ✅ Step 2: Authenticate ngrok (run this once per session — REPLACE with your token!)
!ngrok config add-authtoken 2xGjtPlTL3RBlHDjQQrlFlLsbgI_wcFJ6YAwQMRuPmerCySR

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [24]:
# ✅ Step 3: Kill any previously running Streamlit apps (optional but safe)
!pkill streamlit

In [25]:
# ✅ Step 4: Start Streamlit app in the background
# Replace 'app.py' with your actual Streamlit app filename if it's different
get_ipython().system_raw('streamlit run app.py &')

In [26]:
public_url = ngrok.connect(addr='localhost:8501')

In [27]:
# Print the public URL so you can access your app
print(f"🚀 Your Streamlit app is live at: {public_url}")

🚀 Your Streamlit app is live at: NgrokTunnel: "https://b7ee-130-211-240-209.ngrok-free.app" -> "http://localhost:8501"


now I am just missing one small piece: I need to import joblib to be able to load your model

### Fix: Add the import at the top of your app.py

In [28]:
# import joblib

In [29]:
train_df.to_csv('train_data.csv', index=False)
forecast_df.to_csv('forecast_data.csv', index=False)

# Features used in training
features_used_for_training = ['store_nbr', 'item_nbr', 'day_of_week', 'month']
X_forecast = df[features_used_for_training]

# Predict and add to DataFrame
df['prediction'] = model.predict(X_forecast)

# Streamlit app layout
st.title("🛍️ Forecasted Sales")
st.write("Showing predictions from your XGBoost model:")

# Display predictions
st.dataframe(df[['store_nbr', 'item_nbr', 'prediction']].head(20))

2025-05-18 13:05:27.467 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]


DeltaGenerator()

After I fix it:

	1.	Save the file (app.py).
	2.	Restart the Streamlit server in your notebook:

In [30]:
# !pkill streamlit
get_ipython().system_raw('streamlit run app.py &')

In [31]:
df = pd.read_csv('forecast_data.csv')

# Load trained model using the correct path
model_path = '/content/drive/MyDrive/MasterSchool/MasterSchool_Python/Time_Series/Time_series_project/xgboost_models/xgb_final_model.pkl'
model = joblib.load(model_path)

# Predict
features_used_for_training = ['store_nbr', 'item_nbr', 'day_of_week', 'month']
X_forecast = df[features_used_for_training]
df['prediction'] = model.predict(X_forecast)

# Streamlit UI
st.title("📈 Forecast App")
st.write("Here are the predictions:")

st.dataframe(df[['store_nbr', 'item_nbr', 'prediction']].head(20))



DeltaGenerator()

In [32]:
from pyngrok import ngrok # reconnect ngrok

# Close any existing ngrok tunnels to avoid conflicts
# ngrok.disconnect_all() # Incorrect function
ngrok.kill() # Correct function to terminate all ngrok processes

# Connect to ngrok specifying the address using 'addr'
# public_url = ngrok.connect(port=8501) # Incorrect parameter name
public_url = ngrok.connect(addr=8501, proto='http') # Correct way to specify address and protocol
print(f"🚀 Your Streamlit app is live at: {public_url}")

🚀 Your Streamlit app is live at: NgrokTunnel: "https://082c-130-211-240-209.ngrok-free.app" -> "http://localhost:8501"


In [33]:
gc.collect() # rubbish collection

90