<a href="https://colab.research.google.com/github/Naomie25/DI-Bootcamp/blob/main/Week10_Day3_ExerciceXPipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install streamlit scikit-learn pandas numpy pyngrok

Collecting streamlit
  Downloading streamlit-1.47.1-py3-none-any.whl.metadata (9.0 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.13-py3-none-any.whl.metadata (8.1 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.47.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m54.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.13-py3-none-any.whl (25 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m73.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (

In [3]:
import pandas as pd
import numpy as np

# Exemple de dataset minimal
data = pd.DataFrame({
    'store_nbr': np.random.randint(0, 55, 100),
    'family': np.random.choice(['AUTOMOTIVE', 'BEAUTY', 'BEVERAGES', 'FOODS', 'STATIONERY'], 100),
    'onpromotion': np.random.randint(0, 10, 100),
    'state': np.random.choice(['Pichincha', 'Cotopaxi', 'Guayas'], 100),
    'transactions': np.random.randint(100, 1000, 100),
    'store_type': np.random.choice(['A', 'B', 'C'], 100),
    'cluster': np.random.randint(1, 5, 100),
    'month': np.random.randint(1, 13, 100),
    'day': np.random.randint(1, 29, 100),
    'dayofweek': np.random.randint(1, 8, 100),
    'sales': np.random.uniform(1000, 5000, 100)  # Target variable
})
data.head()


Unnamed: 0,store_nbr,family,onpromotion,state,transactions,store_type,cluster,month,day,dayofweek,sales
0,13,STATIONERY,3,Cotopaxi,396,C,1,2,16,6,4161.07675
1,42,BEAUTY,0,Cotopaxi,837,A,2,4,17,6,1780.91665
2,10,BEVERAGES,2,Cotopaxi,926,B,3,5,6,7,1080.602685
3,3,STATIONERY,2,Cotopaxi,787,B,3,11,13,4,3986.104326
4,19,BEAUTY,4,Guayas,467,A,1,5,5,7,2733.560977


In [5]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# Colonnes catégoriques et numériques
categorical_cols = ['family', 'state', 'store_type']
numerical_cols = ['store_nbr', 'onpromotion', 'transactions', 'cluster', 'month', 'day', 'dayofweek']

X = data[categorical_cols + numerical_cols]
y = data['sales']

# Imputers
num_imputer = SimpleImputer(strategy='median')
cat_imputer = SimpleImputer(strategy='most_frequent')

X_num = num_imputer.fit_transform(X[numerical_cols])
X_cat = cat_imputer.fit_transform(X[categorical_cols])

# Encoder catégorique
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
X_cat_encoded = encoder.fit_transform(X_cat)

# Scale numérique
scaler = StandardScaler()
X_num_scaled = scaler.fit_transform(X_num)

# Combiner les données
import numpy as np
X_processed = np.hstack([X_num_scaled, X_cat_encoded])

# Entraîner modèle
model = RandomForestRegressor(n_estimators=50, random_state=42)
model.fit(X_processed, y)


In [6]:
import pickle

components = {
    "num_imputer": num_imputer,
    "cat_imputer": cat_imputer,
    "encoder": encoder,
    "model": model,
    "scaler": scaler
}

with open("rf_model.pkl", "wb") as f:
    pickle.dump(components, f)


In [7]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import pickle

with open("rf_model.pkl", "rb") as f:
    components = pickle.load(f)

num_imputer = components["num_imputer"]
cat_imputer = components["cat_imputer"]
encoder = components["encoder"]
model = components["model"]
scaler = components["scaler"]

st.title("Sales Prediction Web App")
st.caption("Predict store sales using an interactive ML model.")

input_data = {}

col1, col2, col3 = st.columns(3)
with col1:
    input_data['store_nbr'] = st.slider("Store Number", 0, 54, 10)
    input_data['family'] = st.selectbox("Product Family", ['AUTOMOTIVE', 'BEAUTY', 'BEVERAGES', 'FOODS', 'STATIONERY'])
    input_data['onpromotion'] = st.number_input("Number of Items on Promotion", 0, 10, 2)
    input_data['state'] = st.selectbox("State", ['Pichincha', 'Cotopaxi', 'Guayas'])
    input_data['transactions'] = st.number_input("Transactions", 100, 1000, 500)

with col2:
    input_data['store_type'] = st.selectbox("Store Type", ['A', 'B', 'C'])
    input_data['cluster'] = st.number_input("Cluster", 1, 5, 2)

with col3:
    input_data['month'] = st.slider("Month", 1, 12, 6)
    input_data['day'] = st.slider("Day", 1, 28, 15)
    input_data['dayofweek'] = st.slider("Day of Week", 1, 7, 3)

if st.button("Predict"):
    input_df = pd.DataFrame([input_data])

    categorical_cols = ['family', 'state', 'store_type']
    numerical_cols = ['store_nbr', 'onpromotion', 'transactions', 'cluster', 'month', 'day', 'dayofweek']

    input_df_cat = input_df[categorical_cols]
    input_df_num = input_df[numerical_cols]

    input_df_cat_imputed = cat_imputer.transform(input_df_cat)
    input_df_num_imputed = num_imputer.transform(input_df_num)

    input_df_cat_encoded = encoder.transform(input_df_cat_imputed)
    input_df_num_scaled = scaler.transform(input_df_num_imputed)

    input_processed = np.hstack([input_df_num_scaled, input_df_cat_encoded])

    prediction = model.predict(input_processed)

    st.success(f"Predicted sales: {prediction[0]:.2f}")


Writing app.py


In [None]:
!pip install streamlit scikit-learn pandas numpy pyngrok

from pyngrok import ngrok

# Configure ton authtoken ngrok
ngrok.set_auth_token("30Jn6l3th1QZJmfJTvckXck98GN_2kigNX3N7xixrapYoXsdW")

# Ouvre un tunnel public sur le port 8501 (port par défaut de Streamlit)
public_url = ngrok.connect(8501)
print(f"Streamlit public URL: {public_url}")

# Lance Streamlit en arrière-plan
!streamlit run app.py &



Streamlit public URL: NgrokTunnel: "https://55ac5f9abb56.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.139.170.72:8501[0m
[0m
