In [16]:
#XGBOOST V3.3
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, r2_score
from xgboost import XGBRegressor
import yfinance as yf
import re
import datetime
from bs4 import BeautifulSoup
import requests as rq
import warnings
warnings.filterwarnings('ignore')

FUEL_COL = 'Fuel price(AED)'
OIL_COL = 'Crude Oil Barrel Price (USD)'
GOLD_COL = 'Gold Prices (AED)'

def get_crude_price():
    try:
        crude = yf.Ticker("CL=F")
        data = crude.history(period="5d")
        if not data.empty:
            price = float(data['Close'].iloc[-1])
            if 20 < price < 200:
                return price
    except:
        pass
    return 75.0

def get_latest_gold_price():
    try:
        date = datetime.datetime.now()
        url = f"https://www.uaegoldprice.com/gold-price-history/{date.strftime('%B')}-{date.year}/"
        resp = rq.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
        soup = BeautifulSoup(resp.text, "html.parser")
        table = soup.find("table")

        if table:
            for row in table.find_all("tr"):
                cols = row.find_all("td")
                if len(cols) >= 3:
                    d_txt = cols[0].text.strip()
                    if d_txt.startswith(date.strftime("%d")) or "01" in d_txt:
                        price = float(re.sub(r"[^\d.]", "", cols[2].text.strip()))
                        if 200 < price < 400:
                            return price
    except:
        pass
    return None

coprice = get_crude_price()
gold_price = get_latest_gold_price()

df = pd.read_excel("FuelData.xlsx", index_col=0, parse_dates=True)
df = df[[FUEL_COL, OIL_COL, GOLD_COL]].sort_index()

if gold_price is None:
    gold_price = float(df[GOLD_COL].iloc[-1])

def add_time_series_features_safe(df, max_lag=3):
    df = df.copy()
    for lag in range(1, max_lag + 1):
        df[f'fuel_lag_{lag}'] = df[FUEL_COL].shift(lag)
        df[f'oil_lag_{lag}'] = df[OIL_COL].shift(lag)

    df['oil_momentum'] = df[OIL_COL].pct_change(periods=1)
    df['gold_oil_ratio'] = df[GOLD_COL] / df[OIL_COL]
    df['oil_volatility'] = df[OIL_COL].rolling(window=3).std()

    return df.dropna()

df = add_time_series_features_safe(df)

X = df.drop(columns=[FUEL_COL])
y = df[FUEL_COL]

weights = np.logspace(0.1, 1.6, num=len(y))

tscv = TimeSeriesSplit(n_splits=min(3, len(X)-1))
train_index, test_index = list(tscv.split(X))[-1]
X_train, X_test = X.iloc[train_index], X.iloc[test_index]
y_train, y_test = y.iloc[train_index], y.iloc[test_index]

model = XGBRegressor(
    n_estimators=700,
    max_depth=2,
    learning_rate=0.007,
    subsample=0.85,
    colsample_bytree=0.85,
    reg_lambda=110,
    random_state=8,
    base_score=y.iloc[-4:].mean()
)

model.fit(X_train, y_train, sample_weight=weights[train_index])

y_pred_test = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred_test)
r2 = r2_score(y_test, y_pred_test)

def forecast_next_month(df, model, current_fuel, current_oil, current_gold):
    last = df.iloc[-1]
    feat_dict = {
        OIL_COL: current_oil,
        GOLD_COL: current_gold,
        'fuel_lag_1': current_fuel,
        'fuel_lag_2': last['fuel_lag_1'],
        'fuel_lag_3': last['fuel_lag_2'],
        'oil_lag_1': last[OIL_COL],
        'oil_lag_2': last['oil_lag_1'],
        'oil_lag_3': last['oil_lag_2'],
        'oil_momentum': (current_oil - last[OIL_COL]) / last[OIL_COL],
        'gold_oil_ratio': current_gold / current_oil,
        'oil_volatility': np.std([current_oil, last[OIL_COL], last['oil_lag_1']])
    }

    next_input = pd.DataFrame([feat_dict])[model.get_booster().feature_names]
    raw_pred = model.predict(next_input)[0]

    volatility_factor = np.clip(feat_dict['oil_volatility'] / 12, 0.35, 0.65)
    smoothed_pred = ((1 - volatility_factor) * raw_pred) + (volatility_factor * current_fuel)

    return smoothed_pred

this_month_fuel = df[FUEL_COL].iloc[-1]
next_month_pred = forecast_next_month(df, model, this_month_fuel, coprice, gold_price)

print("\n=== Model Performance Stats ===")
print(f"MAE: {mae:.4f}")
print(f"R² Score: {r2:.4f}")

print("\n=== Next Month Fuel Forecast ===")
print(f"Current Oil: ${coprice:.2f}")
print(f"Last Fuel: {this_month_fuel:.2f} AED")
print(f"Predicted Fuel for Next Month: {next_month_pred:.2f} AED")
print(f"Predicted Change: {next_month_pred - this_month_fuel:.2f} AED")

def analyze_trend(current, predicted):
    change = predicted - current
    if abs(change) < 0.01:
        return "Price expected to remain stable"
    return f"Expected {'increase' if change > 0 else 'decrease'} of {abs(change):.2f} AED"

print("\n" + analyze_trend(this_month_fuel, next_month_pred))


=== Model Performance Stats ===
MAE: 0.1188
R² Score: 0.6849

=== Next Month Fuel Forecast ===
Current Oil: $57.84
Last Fuel: 2.58 AED
Predicted Fuel for Next Month: 2.49 AED
Predicted Change: -0.09 AED

Expected decrease of 0.09 AED
