In [1]:
import sys

In [2]:
print(sys.executable)

d:\fuelwatch_project\fuelwatch-project\ml-services\member1-kumara\venv\Scripts\python.exe


In [3]:
#Step-1  :  Imports & Paths
import json
import numpy as np
import pandas as pd
from pathlib import Path

from sklearn.preprocessing import MinMaxScaler
from joblib import dump

import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

# Notebook lives in: member1-kumara/notebooks/
# Project root is:   member1-kumara/
PROJECT_ROOT = Path.cwd().parents[0]

DATA_RAW = PROJECT_ROOT / "data" / "raw" / "fuel_dispenses.csv"
DATA_PROCESSED = PROJECT_ROOT / "data" / "processed" / "fuel_daily_pivot.csv"
MODELS_DIR = PROJECT_ROOT / "models"

MODELS_DIR.mkdir(parents=True, exist_ok=True)
DATA_PROCESSED.parent.mkdir(parents=True, exist_ok=True)

MODEL_PATH = MODELS_DIR / "fuel_lstm.keras"
SCALER_X_PATH = MODELS_DIR / "scaler_X.pkl"
SCALER_Y_PATH = MODELS_DIR / "scaler_y.pkl"
META_PATH = MODELS_DIR / "model_meta.json"

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

LOOKBACK_DAYS = 14  # best for your current dataset size
EPOCHS = 150
BATCH_SIZE = 16

print("PROJECT_ROOT:", PROJECT_ROOT)
print("DATA_RAW exists:", DATA_RAW.exists())


PROJECT_ROOT: d:\fuelwatch_project\fuelwatch-project\ml-services\member1-kumara
DATA_RAW exists: True


In [4]:
#Step-2  :  Load Raw Data

df = pd.read_csv(DATA_RAW)
df.head()

Unnamed: 0,Site,Type,Date,Number,Class,Site.1,Item,Qty,Amount,Balance
0,Petrol Tank 01,Invoice,2025-10-01,CR/INV/2526/07310,Unclassified,Petrol Tank 01,Lanka Petrol 92 Octane,183.11,46398.31,46398.31
1,,Invoice,2025-10-01,CR/INV/2526/07311,Unclassified,Petrol Tank 01,Lanka Petrol 92 Octane,49.164,12457.63,58855.93
2,,Invoice,2025-10-01,CR/INV/2526/07312,Unclassified,Petrol Tank 01,Lanka Petrol 92 Octane,426.491,108068.64,166924.58
3,,Invoice,2025-10-01,CR/INV/2526/07313,Unclassified,Petrol Tank 01,Lanka Petrol 92 Octane,107.358,27203.39,194127.97
4,,Invoice,2025-10-01,CR/INV/2526/07314,Unclassified,Petrol Tank 01,Lanka Petrol 92 Octane,6.847,1734.89,195862.86


In [5]:
#Step-3  :  Clean & Standardize Columns

df["Date"] = pd.to_datetime(df["Date"])
df["Qty"] = pd.to_numeric(df["Qty"], errors="coerce").fillna(0.0)
df = df.dropna(subset=["Item"])

df = df[["Date", "Item", "Qty"]].copy()
df = df.sort_values("Date")

df.head()

Unnamed: 0,Date,Item,Qty
0,2025-10-01,Lanka Petrol 92 Octane,183.11
464,2025-10-01,Lanka Petrol 92 Octane,256.613
463,2025-10-01,Lanka Petrol 92 Octane,3.075
462,2025-10-01,Lanka Petrol 92 Octane,-10.0
461,2025-10-01,Lanka Petrol 92 Octane,-10.0


In [6]:
#Step-4  :  Aggregate Daily + Pivot (Fuel Types become Columns)

daily = df.groupby(["Date", "Item"], as_index=False)["Qty"].sum()

pivot = (
    daily.pivot_table(index="Date", columns="Item", values="Qty", aggfunc="sum")
    .fillna(0.0)
    .sort_index()
)

# Ensure continuous daily dates (important for time-series)
full_idx = pd.date_range(pivot.index.min(), pivot.index.max(), freq="D")
pivot = pivot.reindex(full_idx, fill_value=0.0)
pivot.index.name = "Date"

pivot.head()

Item,Lanka Auto Diesel,Lanka Petrol 92 Octane,Lanka Petrol 95 Octane,Lanka Super Diesel
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-01,2058.0,7522.43,557.98,275.0
2025-10-02,1701.0,7401.739,541.66,251.0
2025-10-03,0.0,8564.64,514.0,258.0
2025-10-04,1202.0,6723.587,327.9,363.0
2025-10-05,495.0,6716.559,592.49,239.0


In [7]:
#Step-5  :  Add Time Features & Save Processed Dataset

out = pivot.reset_index()
dt = pd.to_datetime(out["Date"])

out["dow"] = dt.dt.dayofweek
out["month"] = dt.dt.month
out["weekofyear"] = dt.dt.isocalendar().week.astype(int)
out["year"] = dt.dt.year
out["is_weekend"] = (out["dow"] >= 5).astype(int)

out.to_csv(DATA_PROCESSED, index=False)

time_cols = ["dow", "month", "weekofyear", "year", "is_weekend"]
fuel_cols = [c for c in out.columns if c not in ["Date"] + time_cols]
feature_cols = fuel_cols + time_cols

print("Saved processed file:", DATA_PROCESSED)
print("Rows:", len(out))
print("Fuel types:", fuel_cols)

Saved processed file: d:\fuelwatch_project\fuelwatch-project\ml-services\member1-kumara\data\processed\fuel_daily_pivot.csv
Rows: 92
Fuel types: ['Lanka Auto Diesel', 'Lanka Petrol 92 Octane', 'Lanka Petrol 95 Octane', 'Lanka Super Diesel']


In [8]:
#Step-6  :  Split (Time-Based) & Scale

X_raw = out[feature_cols].values.astype(np.float32)
y_raw = out[fuel_cols].values.astype(np.float32)

n = len(out)
train_end = int(n * 0.8)

X_train_raw, y_train_raw = X_raw[:train_end], y_raw[:train_end]
X_val_raw, y_val_raw = X_raw[train_end:], y_raw[train_end:]

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_train = scaler_X.fit_transform(X_train_raw)
y_train = scaler_y.fit_transform(y_train_raw)

X_val = scaler_X.transform(X_val_raw)
y_val = scaler_y.transform(y_val_raw)

print("Train rows:", len(X_train), "Val rows:", len(X_val))

Train rows: 73 Val rows: 19


In [9]:
#Step-7  :  Windowing (LSTM expects 3D data)

def make_windows(X, y, lookback):
    Xw, yw = [], []
    for i in range(lookback, len(X)):
        Xw.append(X[i-lookback:i])
        yw.append(y[i])  # next-day prediction
    return np.array(Xw, dtype=np.float32), np.array(yw, dtype=np.float32)

Xw_train, yw_train = make_windows(X_train, y_train, LOOKBACK_DAYS)

Xw_val, yw_val = (None, None)
if len(X_val) > LOOKBACK_DAYS:
    Xw_val, yw_val = make_windows(X_val, y_val, LOOKBACK_DAYS)

print("Xw_train:", Xw_train.shape, "yw_train:", yw_train.shape)
print("Has validation windows:", Xw_val is not None)

Xw_train: (59, 14, 9) yw_train: (59, 4)
Has validation windows: True


In [10]:
#Step-8  :  Build LSTM Model (Multivariate Output)

def build_lstm(lookback, n_features, n_targets):
    m = models.Sequential([
        layers.Input(shape=(lookback, n_features)),
        layers.LSTM(64, return_sequences=True),
        layers.Dropout(0.2),
        layers.LSTM(32),
        layers.Dropout(0.2),
        layers.Dense(n_targets)
    ])
    m.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss="mse")
    return m

model = build_lstm(LOOKBACK_DAYS, Xw_train.shape[-1], yw_train.shape[-1])
model.summary()

In [11]:
#Step-9  :  Train

monitor_metric = "val_loss" if Xw_val is not None else "loss"

cbs = [
    callbacks.EarlyStopping(monitor=monitor_metric, patience=10, restore_best_weights=True),
    callbacks.ReduceLROnPlateau(monitor=monitor_metric, patience=5, factor=0.5, min_lr=1e-6),
]

fit_kwargs = dict(
    x=Xw_train,
    y=yw_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=cbs,
    verbose=1
)

if Xw_val is not None:
    fit_kwargs["validation_data"] = (Xw_val, yw_val)

history = model.fit(**fit_kwargs)

Epoch 1/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 164ms/step - loss: 0.1803 - val_loss: 0.0782 - learning_rate: 0.0010
Epoch 2/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 0.0793 - val_loss: 0.0622 - learning_rate: 0.0010
Epoch 3/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.0602 - val_loss: 0.0789 - learning_rate: 0.0010
Epoch 4/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.0538 - val_loss: 0.0650 - learning_rate: 0.0010
Epoch 5/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.0515 - val_loss: 0.0568 - learning_rate: 0.0010
Epoch 6/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.0449 - val_loss: 0.0587 - learning_rate: 0.0010
Epoch 7/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.0413 - val_loss: 0.0568 - learning_rate: 0.0010
Epoch

In [12]:
#step-10  :  Save Artifacts

model.save(MODEL_PATH)
dump(scaler_X, SCALER_X_PATH)
dump(scaler_y, SCALER_Y_PATH)

meta = {
    "lookback_days": LOOKBACK_DAYS,
    "feature_cols": feature_cols,
    "fuel_cols": fuel_cols,
    "time_cols": time_cols
}

with open(META_PATH, "w", encoding="utf-8") as f:
    json.dump(meta, f, indent=2)

print("✅ Saved model:", MODEL_PATH)
print("✅ Saved scaler_X:", SCALER_X_PATH)
print("✅ Saved scaler_y:", SCALER_Y_PATH)
print("✅ Saved meta:", META_PATH)


✅ Saved model: d:\fuelwatch_project\fuelwatch-project\ml-services\member1-kumara\models\fuel_lstm.keras
✅ Saved scaler_X: d:\fuelwatch_project\fuelwatch-project\ml-services\member1-kumara\models\scaler_X.pkl
✅ Saved scaler_y: d:\fuelwatch_project\fuelwatch-project\ml-services\member1-kumara\models\scaler_y.pkl
✅ Saved meta: d:\fuelwatch_project\fuelwatch-project\ml-services\member1-kumara\models\model_meta.json
