In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit, train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
from skopt import BayesSearchCV
from skopt.space import Real, Integer

In [2]:
df = pd.read_csv("simulated_dataset.csv")  # Load your dataset

In [3]:
df.head(5)

Unnamed: 0,Alloy,Finish,Length_m,Weight_kg_m,Profile_Name,Tolerances,GD_T,Order_Quantity,LME_Price_EUR,Customer_Category,Lead_Time_weeks,Quote_Price_SEK,Quote_Date
0,Nickel,phosphated,19.4,1.266,Skruvkanal,0.175,high,155702,3.76,medium,8.0,1.98,2025-05-01
1,Titanium,plasma sprayed,34.7,1.729,Ändprofil,0.104,medium,107945,3.48,micro,7.0,2.83,2025-09-10
2,Copper,phosphated,35.0,1.415,Kantprofil,0.187,medium,133490,2.97,medium,11.0,2.37,2025-04-03
3,Titanium,phosphated,27.5,0.952,Z-profil,0.135,medium,132768,2.6,large,2.0,2.39,2025-09-13
4,Titanium,hot-dip galvanized,15.9,1.453,Ramlist,0.077,low,58805,2.74,small,2.0,3.69,2025-09-11


In [None]:
#df.columns = df.columns.str.replace(' ', '_')  # Replace spaces with underscores in column names

In [None]:
# Load and sort dataset by date
#df = pd.read_csv("final.csv", parse_dates=["Quote_Date"])
df_sorted = df.sort_values("Quote_Date").reset_index(drop=True)

# Split features and target
X = df_sorted.drop(columns=["Quote_Price_SEK", "Quote_Date"])
y = df_sorted["Quote_Price_SEK"]

# Define Bayesian search space
search_space = {
    'n_estimators': Integer(100, 300),
    'max_depth': Integer(5, 30),
    'min_samples_split': Integer(2, 10),
    'min_samples_leaf': Integer(1, 10),
    'max_features': Real(0.1, 1.0)
}

# Time Series CV
tscv = TimeSeriesSplit(n_splits=5)

# BayesSearchCV
bayes_cv = BayesSearchCV(
    estimator=RandomForestRegressor(random_state=42),
    search_spaces=search_space,
    cv=tscv,
    n_iter=20,
    scoring='neg_root_mean_squared_error',
    n_jobs=-1,
    random_state=42
)
bayes_cv.fit(X, y)

# Train/Test split after sorting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Train final model with best parameters
final_model = RandomForestRegressor(**bayes_cv.best_params_, random_state=42)
final_model.fit(X_train, y_train)

# Predict and evaluate
preds = final_model.predict(X_test)
metrics = {
    "Best Hyperparameters": bayes_cv.best_params_,
    "MAE": round(mean_absolute_error(y_test, preds), 4),
    "RMSE": round(mean_squared_error(y_test, preds), 4),
    "R2 Score": round(r2_score(y_test, preds), 4),
    "MAPE (%)": round(mean_absolute_percentage_error(y_test, preds) * 100, 2)
}
print(metrics)

{'Best Hyperparameters': OrderedDict([('max_depth', 30), ('max_features', 1.0), ('min_samples_leaf', 1), ('min_samples_split', 2), ('n_estimators', 100)]), 'MAE': 0.0022, 'RMSE': 0.0, 'R2 Score': 1.0, 'MAPE (%)': 0.08}


In [None]:
import os
import joblib
import json

In [None]:
# Directory setup for model storage and registry
MODEL_DIR = "models"
REGISTRY_FILE = os.path.join(MODEL_DIR, "model_registry.json")

os.makedirs(MODEL_DIR, exist_ok=True)

In [None]:
# ---------------------------
# 1. Save a model per user
# ---------------------------
def save_model(model, user_id, version):
    user_dir = os.path.join(MODEL_DIR, user_id)
    os.makedirs(user_dir, exist_ok=True)
    filepath = os.path.join(user_dir, f"model_{version}.pkl")
    joblib.dump(model, filepath)
    print(f"✅ Model saved for user '{user_id}' as version '{version}' at: {filepath}")
    return filepath

In [None]:
# ---------------------------
# 2. Load a model per user/version
# ---------------------------
def load_model(user_id, version):
    filepath = os.path.join(MODEL_DIR, user_id, f"model_{version}.pkl")
    if not os.path.exists(filepath):
        raise FileNotFoundError(f"❌ No model found for user '{user_id}' with version '{version}'")
    model = joblib.load(filepath)
    print(f"✅ Model loaded for user '{user_id}', version '{version}'")
    return model

In [None]:
# ---------------------------
# 3. Maintain model registry
# ---------------------------
def update_registry(user_id, version):
    if os.path.exists(REGISTRY_FILE):
        with open(REGISTRY_FILE, "r") as f:
            registry = json.load(f)
    else:
        registry = {}

    if user_id not in registry:
        registry[user_id] = {"latest": version, "history": [version]}
    else:
        if version not in registry[user_id]["history"]:
            registry[user_id]["history"].append(version)
        registry[user_id]["latest"] = version

    with open(REGISTRY_FILE, "w") as f:
        json.dump(registry, f, indent=4)

    print(f"📘 Registry updated for user '{user_id}' with version '{version}'")

In [None]:
# ---------------------------
# 4. Retrieve latest model version for a user
# ---------------------------
def get_latest_version(user_id):
    if os.path.exists(REGISTRY_FILE):
        with open(REGISTRY_FILE, "r") as f:
            registry = json.load(f)
        return registry.get(user_id, {}).get("latest")
    return None

# Simulate saving a model
dummy_model = RandomForestRegressor()
dummy_model.fit([[0, 1], [1, 0]], [0, 1])  # simple dummy fit for demo

user_id = "user_001"
version = "v1"

model_path = save_model(dummy_model, user_id, version)
update_registry(user_id, version)


In [None]:
# Demonstrate loading the latest model
latest_version = get_latest_version(user_id)
loaded_model = load_model(user_id, latest_version)