In [32]:
import pandas as pd
import numpy as np
import sys
import joblib
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, KFold, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
import lightgbm as lgb

## Import the saved model

In [33]:
data = pd.read_pickle("/Users/secret/Desktop/Seoul_bike/MLRegression_SeoulBikeRental/phase_three/processed_data_timo.pkl")
target = "Rented Bike Count"

## Train and Test Split

In [34]:
X = data.drop('Rented Bike Count', axis=1)
y = data['Rented Bike Count']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((7008, 11), (1752, 11), (7008,), (1752,))

## Train the model and set up the Sklearn pipeline

In [35]:
numeric_features = X.select_dtypes(include=["float", "int64"]).columns
numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

In [36]:
categorical_features = X.select_dtypes(include=["category"]).columns
categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(handle_unknown="ignore"))])

## Combining the processing steps

In [37]:
preprocesor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features)
    ]
)

In [38]:
# build the pipeline
pipeline = Pipeline(
    steps=[("preprocessor", preprocesor), ("regressor", lgb.LGBMRegressor(random_state=42, verbose=0))]
)

In [39]:
# fit the pipeline to train the model on the training set
model = pipeline.fit(X_train, y_train)

## Evaluating the model

In [40]:
predictions = model.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, predictions))
r2 = r2_score(y_test, predictions)

print("RMSE:", rmse)
print("R2:", r2)

RMSE: 253.95160060005907
R2: 0.8452130137979021


## Exporting the model

In [41]:
ref_cols = list(X.columns)

joblib.dump(value=[model, ref_cols, target], filename="/Users/secret/Desktop/Seoul_bike/MLRegression_SeoulBikeRental/phase_three/exported_model_timo.pkl")

['/Users/secret/Desktop/Seoul_bike/MLRegression_SeoulBikeRental/phase_three/exported_model_timo.pkl']