In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [3]:
url = "https://drive.google.com/uc?id=1cnzXzPrShReziNp7B1hQ9HURVu9drGGF"
df = pd.read_csv(url)
#https://drive.google.com/file/d/1cnzXzPrShReziNp7B1hQ9HURVu9drGGF/view?usp=sharing

In [4]:
df.head()

Unnamed: 0,hour_of_day,cash_type,money,coffee_name,Time_of_Day,Weekday,Month_name,Weekdaysort,Monthsort,Date,Time
0,10,card,38.7,Latte,Morning,Fri,Mar,5,3,2024-03-01,10:15:50.520000
1,12,card,38.7,Hot Chocolate,Afternoon,Fri,Mar,5,3,2024-03-01,12:19:22.539000
2,12,card,38.7,Hot Chocolate,Afternoon,Fri,Mar,5,3,2024-03-01,12:20:18.089000
3,13,card,28.9,Americano,Afternoon,Fri,Mar,5,3,2024-03-01,13:46:33.006000
4,13,card,38.7,Latte,Afternoon,Fri,Mar,5,3,2024-03-01,13:48:14.626000


In [5]:
   df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3547 entries, 0 to 3546
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   hour_of_day  3547 non-null   int64  
 1   cash_type    3547 non-null   object 
 2   money        3547 non-null   float64
 3   coffee_name  3547 non-null   object 
 4   Time_of_Day  3547 non-null   object 
 5   Weekday      3547 non-null   object 
 6   Month_name   3547 non-null   object 
 7   Weekdaysort  3547 non-null   int64  
 8   Monthsort    3547 non-null   int64  
 9   Date         3547 non-null   object 
 10  Time         3547 non-null   object 
dtypes: float64(1), int64(3), object(7)
memory usage: 304.9+ KB


## Preprocessing

In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

# Kolom numerik yang akan di-scale
cols_to_scale = ['hour_of_day', 'Weekdaysort', 'Monthsort', 'money']

# Lakukan scaling
df[cols_to_scale] = scaler.fit_transform(df[cols_to_scale])

#Training

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

# Fitur dan target
features = ['hour_of_day', 'Weekdaysort', 'Monthsort']
X = df[features]
y = df['money']

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Standardisasi fitur
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=features)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=features)

# Model training
rf = RandomForestRegressor(n_estimators=200, random_state=42)
rf.fit(X_train_scaled, y_train)

# Evaluasi
y_pred = rf.predict(X_test_scaled)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Hasil Evaluasi Model:")
print(f"MSE: {mse:.3f}")
print(f"RMSE: {rmse:.3f}")
print(f"MAE: {mae:.3f}")
print(f"R²: {r2:.3f}")

# Simpan model dan scaler untuk Streamlit App
joblib.dump(rf, "rf_model.joblib")
joblib.dump(scaler, "scaler_coffee.joblib")
print("Model & Scaler berhasil disimpan!")

Hasil Evaluasi Model:
MSE: 22.428
RMSE: 4.736
MAE: 3.773
R²: 0.020
Model & Scaler berhasil disimpan!


# Model Inference

In [14]:
from sklearn.preprocessing import StandardScaler
# Minta input yang sesuai dengan data 'Coffe_sales.csv'
# (Menggantikan TV, Radio, Newspaper)
hour = int(input("Masukkan jam (mis: 10): "))
weekday_sort = int(input("Masukkan urutan hari (1-7): "))
month_sort = int(input("Masukkan urutan bulan (1-12): "))

# Buat data baru dalam bentuk dictionary (disesuaikan)
data_baru = {
    'hour_of_day': hour,
    'Weekdaysort': weekday_sort,
    'Monthsort': month_sort
}

# Buat DataFrame dari data baru
new_df = pd.DataFrame([data_baru])
print(f"Data baru yang akan diprediksi:\n{new_df}\n")

# Define cols_to_scale with the feature columns
cols_to_scale = ['hour_of_day', 'Weekdaysort', 'Monthsort']

# Fit a new scaler on the training data and transform the new data
scaler = StandardScaler()
scaler.fit(X_train[cols_to_scale]) # Fit on training data features
new_df[cols_to_scale] = scaler.transform(new_df[cols_to_scale])

# Pastikan urutan kolom sesuai dengan data training 'X' (though already handled by using X.columns for the scaler fit)
new_df = new_df[X.columns]


print(f"Data baru setelah preprocessing (scaling):\n{new_df}\n")

# Lakukan prediksi (menggantikan 'prediksi_charges')
prediksi_money = rf.predict(new_df)

# Tampilkan hasil (menggantikan 'Sales Revenue')
print("-" * 30)
print(f"Hasil Prediksi Harga (Money): ${prediksi_money[0]:.2f}")
print("-" * 30)

Masukkan jam (mis: 10): 6
Masukkan urutan hari (1-7): 7
Masukkan urutan bulan (1-12): 12
Data baru yang akan diprediksi:
   hour_of_day  Weekdaysort  Monthsort
0            6            7         12

Data baru setelah preprocessing (scaling):
   hour_of_day  Weekdaysort  Monthsort
0    -1.926965     1.613669   1.584132

------------------------------
Hasil Prediksi Harga (Money): $31.50
------------------------------


In [10]:
import joblib

# Export the trained model
filename = 'rf_model.joblib'
joblib.dump(rf, filename)

print(f"Model exported successfully as {filename}")

Model exported successfully as rf_model.joblib


In [15]:
# ===============================================================
# ☕ Coffee Sales Model Training (Light Output)
# ===============================================================

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import joblib
from google.colab import files

# Load dataset
url = "https://drive.google.com/uc?id=1cnzXzPrShReziNp7B1hQ9HURVu9drGGF"
df = pd.read_csv(url)

print("✅ Dataset berhasil dimuat!")

# Preprocessing
num_features = ['hour_of_day', 'Weekdaysort', 'Monthsort']
X = df[num_features]
y = df['money']

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scaling
scaler = StandardScaler()
X_train[num_features] = scaler.fit_transform(X_train[num_features])
X_test[num_features] = scaler.transform(X_test[num_features])

# Train Model
model = RandomForestRegressor(
    n_estimators=200,
    random_state=42,
    n_jobs=-1
)
model.fit(X_train, y_train)

print("✅ Model berhasil dilatih!")

# Save files
joblib.dump(model, "rf_model.joblib")
joblib.dump(scaler, "scaler_coffee.joblib")

print("✅ Model & Scaler berhasil disimpan!")

# Download files (lebih cepat & ringan)
files.download("rf_model.joblib")
files.download("scaler_coffee.joblib")

print("📥 Siap digunakan di Streamlit App!")


✅ Dataset berhasil dimuat!
✅ Model berhasil dilatih!
✅ Model & Scaler berhasil disimpan!


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

📥 Siap digunakan di Streamlit App!
