In [1]:
dataset_path = '/content/drive/MyDrive/Asah-Capstone Project!/Dataset Thingy/dataset fixxx'

In [2]:
saving_path = '/content/drive/MyDrive/Asah-Capstone Project!/Dataset Thingy/Modelling/Hasil Model'

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
cuaca = pd.read_csv(dataset_path + '/weather_data.csv')
efcap = pd.read_csv(dataset_path + '/effective_capacity.csv')
production_plan = pd.read_csv(dataset_path + '/production_plan.csv')

# Forecast Weather

In [6]:
!pip install xgboost==2.0.3



In [7]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error

In [8]:
df_cuaca = cuaca.copy()

In [9]:
df_cuaca["date"] = pd.to_datetime(df_cuaca["date"])
df_cuaca = df_cuaca.sort_values(["mine_id", "date"]).reset_index(drop=True)

# encode categorical mine_id
le = LabelEncoder()
df_cuaca["mine_id_enc"] = le.fit_transform(df_cuaca["mine_id"])

In [10]:
def create_features(df, target, lags=[1,3,7], rolls=[3,7]):
    df = df.copy()

    # time-based
    df["month"] = df["date"].dt.month
    df["week"] = df["date"].dt.isocalendar().week.astype(int)
    df["dayofyear"] = df["date"].dt.dayofyear

    # lags per mine
    for lag in lags:
        df[f"{target}_lag{lag}"] = df.groupby("mine_id")[target].shift(lag)

    # rolling averages per mine
    for r in rolls:
        df[f"{target}_roll{r}"] = df.groupby("mine_id")[target].shift(1).rolling(r).mean()

    return df

In [11]:
def train_xgb(df, target):
    # buat fitur
    df_feat = create_features(df, target)

    # drop NA akibat lag/rolling
    df_feat = df_feat.dropna().reset_index(drop=True)

    # train-test split (time based)
    cutoff = int(len(df_feat) * 0.8)
    train = df_feat.iloc[:cutoff]
    test = df_feat.iloc[cutoff:]

    feature_cols = [col for col in df_feat.columns
                    if col not in ["date", target, "remark", "weather_id", "mine_id"]]

    X_train, y_train = train[feature_cols], train[target]
    X_test, y_test = test[feature_cols], test[target]

    model = XGBRegressor(
        n_estimators=500,
        learning_rate=0.05,
        max_depth=6,
        objective="reg:squarederror",
        subsample=0.8,
        colsample_bytree=0.8,
        eval_metric="rmse",
        tree_method="hist"
    )

    # versi aman untuk XGBoost 3.1.2
    model.fit(
        X_train, y_train,
        eval_set=[(X_test, y_test)],
        verbose=False,
        early_stopping_rounds=20
    )

    preds = model.predict(X_test)
    mae = mean_absolute_error(y_test, preds)
    print(f"{target} MAE: {mae:.3f}")

    return model, feature_cols, df_feat

In [12]:
models = {}
feature_sets = {}
df_feat_store = {}

targets = ["rainfall_mm", "temperature_c", "humidity_pct", "wind_speed_kmh"]

for t in targets:
    print("\nTraining model for:", t)
    model, feats, df_feat = train_xgb(df_cuaca, t)
    models[t] = model
    feature_sets[t] = feats
    df_feat_store[t] = df_feat


Training model for: rainfall_mm




rainfall_mm MAE: 0.271

Training model for: temperature_c




temperature_c MAE: 0.441

Training model for: humidity_pct




humidity_pct MAE: 2.005

Training model for: wind_speed_kmh




wind_speed_kmh MAE: 0.150


In [13]:
def forecast_future(df, models, feature_sets, days_ahead=7):
    df_future = df.copy()
    last_date = df["date"].max()

    future_rows = []

    for i in range(days_ahead):
        next_date = last_date + pd.Timedelta(days=i+1)

        for mine in df["mine_id"].unique():
            row = {
                "date": next_date,
                "mine_id": mine,
                "mine_id_enc": le.transform([mine])[0],
                "month": next_date.month,
                "week": next_date.isocalendar().week,
                "dayofyear": next_date.timetuple().tm_yday
            }

            temp_df = pd.concat([df_future, pd.DataFrame([row])], ignore_index=True)

            for target in targets:
                temp_df = create_features(temp_df, target)

                # ambil baris terakhir untuk predict
                pred_input = temp_df.iloc[-1:][feature_sets[target]]

                # predict
                row[target] = models[target].predict(pred_input)[0]

            #df_future = pd.concat([df_future, pd.DataFrame(row)], ignore_index=True, axis = 1)
            future_rows.append(row)

    return pd.DataFrame(future_rows)

In [14]:
df_pred_weather = forecast_future(df_cuaca, models, feature_sets, days_ahead=7)
df_pred_weather.head()

Unnamed: 0,date,mine_id,mine_id_enc,month,week,dayofyear,rainfall_mm,temperature_c,humidity_pct,wind_speed_kmh
0,2025-11-08,MINE_1,0,11,45,312,0.505132,25.413834,68.557098,0.214318
1,2025-11-08,MINE_2,1,11,45,312,0.411516,25.164579,68.466515,0.266288
2,2025-11-08,MINE_3,2,11,45,312,0.668217,25.932009,64.65519,0.239608
3,2025-11-08,MINE_4,3,11,45,312,0.604666,25.563963,68.584282,0.297618
4,2025-11-08,MINE_5,4,11,45,312,0.545919,25.719606,68.178108,0.309793


## Saving Model

In [None]:
import os
import pickle
import json # Ensure json is imported for saving model_paths

output_dir = f'{saving_path}/Hasil Model'
os.makedirs(output_dir, exist_ok=True)

model_paths = {}
for target, model in models.items():
    model_filename = f'{output_dir}/forecast_weather_xgboost_{target}.pkl'
    # Use pickle.dump to save the model in .pkl format
    with open(model_filename, 'wb') as f:
        pickle.dump(model, f)
    model_paths[target] = model_filename

# Save the dictionary of model paths to a JSON file
with open(f'{output_dir}/model_paths.json', 'w') as f:
    json.dump(model_paths, f, indent=4)

print(f"All models and their paths have been saved to '{output_dir}' directory.")
print(f"Model paths mapping saved to '{output_dir}/model_paths.json'")

All models and their paths have been saved to '/content/drive/MyDrive/Asah-Capstone Project!/Dataset Thingy/Modelling/Hasil Model' directory.
Model paths mapping saved to '/content/drive/MyDrive/Asah-Capstone Project!/Dataset Thingy/Modelling/Hasil Model/model_paths.json'


# Klasifikasi Cuaca

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
import pickle

In [6]:
df = pd.read_csv(f"{dataset_path}/weather_data.csv")

In [7]:
X = df.drop(columns=["remark", "weather_id", "mine_id"])
y = df["remark"]

le_remark = LabelEncoder()
y_encoded = le_remark.fit_transform(y)

X_encoded = X.copy()
encoders = {}

for col in X_encoded.select_dtypes(include=['object', 'category']).columns:
    le = LabelEncoder()
    X_encoded[col] = le.fit_transform(X_encoded[col])
    encoders[col] = le

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

In [9]:
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV

xgb = XGBClassifier(
    objective='multi:softmax',
    num_classes = len(le_remark.classes_),
    eval_metric='mlogloss',
    random_state=42
)

param_grid = {
    'max_depth': [4, 6],
    'learning_rate': [0.05, 0.1],
    'n_estimators': [150, 250],
    'subsample': [0.8, 1],
    'colsample_bytree': [0.8, 1]
}

grid = GridSearchCV(
    xgb,
    param_grid,
    cv=5,
    scoring='accuracy',
    verbose=1,
    n_jobs=-1
)

grid.fit(X_train, y_train)

print("Best params:", grid.best_params_)
print("Best CV accuracy:", grid.best_score_)


Fitting 5 folds for each of 32 candidates, totalling 160 fits


Parameters: { "num_classes" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Best params: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 4, 'n_estimators': 150, 'subsample': 1}
Best CV accuracy: 0.8344849419176181


In [10]:
y_pred = grid.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=le_remark.classes_))

Accuracy: 0.8382187147688839
[[  36    0    0    0    0  241]
 [   8    0    0    0    0   12]
 [   0    0   86    0    0    0]
 [   0    0    0  156    2    1]
 [   0    0    0    0   12    0]
 [  22    0    0    1    0 1197]]
              precision    recall  f1-score   support

     Berawan       0.55      0.13      0.21       277
       Cerah       0.00      0.00      0.00        20
 Hujan lebat       1.00      1.00      1.00        86
Hujan ringan       0.99      0.98      0.99       159
Hujan sedang       0.86      1.00      0.92        12
     Mendung       0.82      0.98      0.90      1220

    accuracy                           0.84      1774
   macro avg       0.70      0.68      0.67      1774
weighted avg       0.80      0.84      0.79      1774



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [11]:
pickle.dump(grid.best_estimator_, open("xgb_weather_classification.pkl", "wb"))
pickle.dump(le_remark, open("label_encoder_target.pkl", "wb"))
pickle.dump(encoders, open("feature_encoders.pkl", "wb"))

print("dah selesai")

dah selesai


# Model Effective Capacity

In [21]:
df_efcap = efcap.copy()

df_efcap

Unnamed: 0,effcap_id,plan_id,mine_id,equipment_id,equipment_type,week_start,road_condition,weather_condition,availability_pct,effective_capacity_ton_day,remark
0,EFC00000-1,PLAN0001-1,MINE_1,EQ037-1,Excavator,2019-01-01,Fair,Mendung,73,29.46,Optimal
1,EFC00001-1,PLAN0002-1,MINE_1,EQ037-1,Excavator,2019-01-08,Good,Mendung,96,44.30,Optimal
2,EFC00002-1,PLAN0003-1,MINE_1,EQ033-1,Loader,2019-01-15,Good,Mendung,93,35.15,Optimal
3,EFC00003-1,PLAN0004-1,MINE_1,EQ007-1,Dump Truck,2019-01-22,Fair,Mendung,93,1531.46,Optimal
4,EFC00004-1,PLAN0005-1,MINE_1,EQ017-1,Loader,2019-02-01,Good,Mendung,87,41.20,Optimal
...,...,...,...,...,...,...,...,...,...,...,...
1195,EFC01195-1,PLAN0336-1,MINE_1,EQ027-1,Excavator,2025-12-22,Good,Mendung,98,399.23,Optimal
1196,EFC01196-4,PLAN0192-4,MINE_4,EQ026-4,Excavator,2025-12-22,Good,Mendung,87,218.14,Optimal
1197,EFC01197-2,PLAN0288-2,MINE_2,EQ049-2,Loader,2025-12-22,Good,Mendung,71,26.07,Optimal
1198,EFC01198-3,PLAN0240-3,MINE_3,EQ039-3,Excavator,2025-12-22,Fair,Mendung,86,50.64,Optimal


In [22]:
df_efcap.drop(columns=['effcap_id','equipment_id', 'remark', 'plan_id', 'week_start'], inplace=True)
df_efcap

Unnamed: 0,mine_id,equipment_type,road_condition,weather_condition,availability_pct,effective_capacity_ton_day
0,MINE_1,Excavator,Fair,Mendung,73,29.46
1,MINE_1,Excavator,Good,Mendung,96,44.30
2,MINE_1,Loader,Good,Mendung,93,35.15
3,MINE_1,Dump Truck,Fair,Mendung,93,1531.46
4,MINE_1,Loader,Good,Mendung,87,41.20
...,...,...,...,...,...,...
1195,MINE_1,Excavator,Good,Mendung,98,399.23
1196,MINE_4,Excavator,Good,Mendung,87,218.14
1197,MINE_2,Loader,Good,Mendung,71,26.07
1198,MINE_3,Excavator,Fair,Mendung,86,50.64


In [23]:
from sklearn.preprocessing import LabelEncoder
import joblib

cat_columns = df_efcap.select_dtypes(include=['object']).columns

encoders = {}  # dictionary to store encoders per column

for col in cat_columns:
    le = LabelEncoder()
    df_efcap[col] = le.fit_transform(df_efcap[col])
    encoders[col] = le

# Save the encoders dictionary
joblib.dump(encoders, f"encoders_efcap.pkl")

['encoders_efcap.pkl']

In [24]:
encoders

{'mine_id': LabelEncoder(),
 'equipment_type': LabelEncoder(),
 'road_condition': LabelEncoder(),
 'weather_condition': LabelEncoder()}

In [25]:
from sklearn.model_selection import train_test_split

X = df_efcap.drop(columns=['effective_capacity_ton_day'])
y = df_efcap['effective_capacity_ton_day']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state= 88)

In [26]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor()
model.fit(X_train, y_train)

In [27]:
y_pred = model.predict(X_test)

In [28]:
from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MSE: {mse}')
print(f'R2: {r2}')

MSE: 33121.52665133028
R2: 0.8904416512747547


In [30]:
!pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-25.7.0-py3-none-any.whl.metadata (12 kB)
Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyaml-25.7.0-py3-none-any.whl (26 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-25.7.0 scikit-optimize-0.10.2


In [31]:
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, make_scorer, r2_score, mean_absolute_percentage_error

# Define the hyperparameter search space for RandomForestRegressor
param_grid_rf = {
    'n_estimators': Integer(50, 200),
    'max_features': Categorical(['sqrt', 'log2', None]),
    'max_depth': Integer(5, 30),
    'min_samples_split': Integer(2, 10),
    'min_samples_leaf': Integer(1, 5),
    'bootstrap': Categorical([True, False])
}

# Define the RandomForestRegressor model
rf_model = RandomForestRegressor(random_state=88)

# Define the scorer (BayesSearchCV maximizes, so we use negative MSE)
scorer = make_scorer(mean_squared_error, greater_is_better=False)

# Initialize BayesSearchCV
bayes_search_rf = BayesSearchCV(
    estimator=rf_model,
    search_spaces=param_grid_rf,
    n_iter=50, # Number of optimization steps
    cv=5, # Cross-validation folds
    scoring=scorer,
    random_state=88,
    n_jobs=-1 # Use all available cores
)

# Perform the optimization
bayes_search_rf.fit(X_train, y_train)

# Get the best parameters and best score
best_params_rf = bayes_search_rf.best_params_
best_score_rf = -bayes_search_rf.best_score_ # Convert back to positive MSE

print(f"Best parameters for RandomForestRegressor: {best_params_rf}")
print(f"Best MSE for RandomForestRegressor: {best_score_rf}")

# Train a new RandomForestRegressor model with the best parameters
best_rf_model = RandomForestRegressor(**best_params_rf, random_state=88)
best_rf_model.fit(X_train, y_train)

# Evaluate the best RandomForestRegressor model on the test set
y_pred_rf_tuned = best_rf_model.predict(X_test)
mse_rf_tuned = mean_squared_error(y_test, y_pred_rf_tuned)
r2_rf_tuned = r2_score(y_test, y_pred_rf_tuned)
mape_rf_tuned = mean_absolute_percentage_error(y_test, y_pred_rf_tuned)

print(f"\nTunned RandomForestRegressor Model Performance on Test Set:")
print(f"MSE: {mse_rf_tuned}")
print(f"R2: {r2_rf_tuned}")
print(f"MAPE: {mape_rf_tuned}")



Best parameters for RandomForestRegressor: OrderedDict({'bootstrap': True, 'max_depth': 11, 'max_features': 'log2', 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 200})
Best MSE for RandomForestRegressor: 27096.056333443546

Tunned RandomForestRegressor Model Performance on Test Set:
MSE: 26163.868538547176
R2: 0.9134559749155615
MAPE: 1.27159181136574


In [None]:
import joblib
import json
import os

# Define the output directory for effective capacity models
efcap_output_dir = f'{saving_path}/Hasil Model'
os.makedirs(efcap_output_dir, exist_ok=True)

# Save the best effective capacity model
joblib.dump(best_rf_model, f"{efcap_output_dir}/model_effective_capacity.pkl")

# Save the best parameters found by BayesSearchCV for effective capacity
best_params_efcap_for_json = {k: str(v) for k, v in best_params_rf.items()}
with open(f"{efcap_output_dir}/best_params_effective_capacity.json", "w") as f:
    json.dump(best_params_efcap_for_json, f, indent=4)

# Save the encoders used for effective capacity (assuming 'encoders' variable holds them)
joblib.dump(encoders, f"{efcap_output_dir}/encoders_effective_capacity.pkl")

print(f"Effective capacity model saved to: {efcap_output_dir}/model_effective_capacity.pkl")
print(f"Best parameters saved to: {efcap_output_dir}/best_params_effective_capacity.json")
print(f"Effective capacity encoders saved to: {efcap_output_dir}/encoders_effective_capacity.pkl")

# Model Production Plan

In [32]:
# Contoh merge pakai plan_id
df = pd.merge(production_plan,
              efcap,
              on='plan_id',
              how='inner')

In [33]:
features = ['road_condition', 'weather_condition', 'availability_pct', 'effective_capacity_ton_day', 'planned_output_ton']
X = df[features]
y = df['actual_output_ton']

In [34]:
#import label encoder
from sklearn.preprocessing import LabelEncoder

In [35]:
le_road = LabelEncoder()
X['road_condition'] = le_road.fit_transform(X['road_condition'])

le_weather = LabelEncoder()
X['weather_condition'] = le_weather.fit_transform(X['weather_condition'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['road_condition'] = le_road.fit_transform(X['road_condition'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['weather_condition'] = le_weather.fit_transform(X['weather_condition'])


In [36]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Pisah train-test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Kolom kategori & numerik
cat_cols = ['road_condition', 'weather_condition']
num_cols = ['availability_pct','effective_capacity_ton_day','planned_output_ton']

# Preprocessor
preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), cat_cols),
    ('num', StandardScaler(), num_cols)
])

In [37]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

pipeline = Pipeline([
    ('prep', preprocessor),
    ('model', model)
])

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

print("R2 Score:", r2_score(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

R2 Score: 0.8208232320105721
RMSE: 15204.044979428378


In [39]:
!pip install scikit-optimize

from skopt import BayesSearchCV
from skopt.space import Real, Integer
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

# Define the parameter space for RandomForestRegressor
param_space = {
    'model__n_estimators': Integer(50, 200),
    'model__max_features': Real(0.1, 1.0, prior='uniform'),
    'model__max_depth': Integer(5, 20),
    'model__min_samples_split': Integer(2, 10),
    'model__min_samples_leaf': Integer(1, 5)
}

# Create a pipeline with the preprocessor and the RandomForestRegressor model
pipeline_bayes = Pipeline([
    ('prep', preprocessor),
    ('model', RandomForestRegressor(random_state=42))
])

# Instantiate BayesSearchCV
bayes_search = BayesSearchCV(
    pipeline_bayes,
    param_space,
    n_iter=50,  # Number of optimization iterations
    cv=5,       # Number of cross-validation folds
    scoring='r2', # Metric to optimize
    random_state=42,
    n_jobs=-1   # Use all available cores
)

# Fit BayesSearchCV to the training data
bayes_search.fit(X_train, y_train)

# Print the best parameters and best score
print("Best parameters found:", bayes_search.best_params_)
print("Best R2 score found:", bayes_search.best_score_)

# Evaluate the best model on the test set
y_pred_bayes = bayes_search.predict(X_test)
print("R2 Score on test set (BayesCV):", r2_score(y_test, y_pred_bayes))
print("RMSE on test set (BayesCV):", np.sqrt(mean_squared_error(y_test, y_pred_bayes)))
print("MAPE on test set (BayesCV): ", np.mean(np.abs((y_test - y_pred_bayes) / y_test)) * 100)

Best parameters found: OrderedDict({'model__max_depth': 7, 'model__max_features': 0.7310131060293615, 'model__min_samples_leaf': 1, 'model__min_samples_split': 2, 'model__n_estimators': 200})
Best R2 score found: 0.8358324945757856
R2 Score on test set (BayesCV): 0.8341898356314899
RMSE on test set (BayesCV): 14625.94282660508
MAPE on test set (BayesCV):  11.74628058423745


In [None]:
import joblib
import json
import os

# Define the output directory for production plan models
prod_plan_output_dir = f'{saving_path}/Hasil Model'
os.makedirs(prod_plan_output_dir, exist_ok=True)

# Save the best production plan model (pipeline)
joblib.dump(bayes_search.best_estimator_, f"{prod_plan_output_dir}/model_production_plan.pkl")

# Save the best parameters found by BayesSearchCV
# Convert any non-string parameter values to string for JSON compatibility
best_params_for_json = {k: str(v) for k, v in bayes_search.best_params_.items()}
with open(f"{prod_plan_output_dir}/best_params_prod_plan.json", "w") as f:
    json.dump(best_params_for_json, f, indent=4)

# Save the LabelEncoders for road_condition and weather_condition
encoders_prod_plan = [le_road, le_weather]
joblib.dump(encoders_prod_plan, f"{prod_plan_output_dir}/encoders_prodplan.pkl")

print(f"Production plan model saved to: {prod_plan_output_dir}/model_production_plan.pkl")
print(f"Best parameters saved to: {prod_plan_output_dir}/best_params_prod_plan.json")
print(f"Production plan encoders saved to: {prod_plan_output_dir}/encoders_prodplan.pkl")

## Requirements

In [15]:
import subprocess

def generate_requirements_file(output_filename="requirements.txt"):
    """Generates a requirements.txt file with all installed packages."""
    try:
        # Run pip freeze command to get installed packages
        result = subprocess.run(['pip', 'freeze'], capture_output=True, text=True, check=True)
        requirements = result.stdout

        # Save the output to a file
        with open(output_filename, 'w') as f:
            f.write(requirements)
        print(f"Requirements successfully written to {output_filename}")
    except subprocess.CalledProcessError as e:
        print(f"Error generating requirements: {e}")
        print(f"Stderr: {e.stderr}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

# Call the function to generate the file
generate_requirements_file()

Requirements successfully written to requirements.txt
