In [31]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.metrics import r2_score

# Dataset: Bike Sharing Daily
*Source: https: // www.kaggle.com / contactprad / bike - share - daily - data
*Licence: [1]
Fanaee - T, Hadi, and Gama, Joao, "Event labeling combining ensemble detectors and background knowledge", Progress in Artificial
Intelligence(2013): pp.
1 - 15, Springer
Berlin
Heidelberg, doi: 10.1007 / s13748 - 013 - 0040 - 3.


@article


{
    year = {2013},
           issn = {2192 - 6352},
                  journal = {Progress in Artificial
Intelligence},
doi={10.1007 / s13748-013-0040-3},
title={Event labeling combining ensemble detectors and background knowledge},
url={http: // dx.doi.org / 10.1007 / s13748-013-0040-3},
publisher={Springer Berlin Heidelberg},
keywords={Event labeling; Event detection; Ensemble learning; Background knowledge},
author={Fanaee-T, Hadi and Gama, Joao},
pages={1-15}
}

In [32]:
data = pd.read_csv('datasets/bike_sharing_daily.csv')

X = data.drop('cnt',axis=1)
y = data['cnt']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

# Create the pipeline

In [33]:
# NUMERIC FEATURES
numeric_features = ['temp', 'atemp', 'hum', 'windspeed']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())])

# CATEGORICAL FEATURES
categorical_features = ['season', 'mnth', 'holiday', 'weekday', 'workingday', 'weathersit']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant')),
    ('encoder', OrdinalEncoder())])

# PREPROCESSING STEPS
preprocessor = ColumnTransformer(transformers=[
    ('numeric', numeric_transformer, numeric_features)
    ,('categorical', categorical_transformer, categorical_features)])

# PIPELINE INCLUDING A MODEL
pipeline = Pipeline(steps = [
    ('preprocessor', preprocessor),
    ('regressor',LinearRegression())])

## Fit the pipeline to the train data

In [34]:
rf_model = pipeline.fit(X_train, y_train)
print (rf_model)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('numeric',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer()),
                                                                  ('scaler',
                                                                   StandardScaler())]),
                                                  ['temp', 'atemp', 'hum',
                                                   'windspeed']),
                                                 ('categorical',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(strategy='constant')),
                                                                  ('encoder',
                                                                   OrdinalEncoder())]),
                        

# How to access information related with each preprocessing step?

In [49]:
# NUMERIC SCALER
print(f"Scale: {rf_model.named_steps.preprocessor.transformers_[0][1].named_steps['scaler'].scale_}")
print(f"Mean: {rf_model.named_steps.preprocessor.transformers_[0][1].named_steps['scaler'].mean_}")
print(f"Variance: {rf_model.named_steps.preprocessor.transformers_[0][1].named_steps['scaler'].var_}")

Scale: [0.18320784 0.16293262 0.14197984 0.07778217]
Mean: [0.49254957 0.47159279 0.62575302 0.1908211 ]
Variance: [0.03356511 0.02654704 0.02015827 0.00605007]
Variance: ['x0' 'x1' 'x2' 'x3']


In [53]:
# CATEGORICAL ENCODER
print(f"Scale: {rf_model.named_steps.preprocessor.transformers_[1][1].named_steps['encoder'].categories_}")

Scale: [array([1, 2, 3, 4]), array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12]), array([0, 1]), array([0, 1, 2, 3, 4, 5, 6]), array([0, 1]), array([1, 2, 3])]


# How to access information related with the model?

In [35]:
rf_model.named_steps.regressor.coef_

array([ 546.92088171,  553.77780936, -349.59325099, -305.2466437 ,
        462.35365645,  -28.00078718, -454.49164861,   83.00523183,
         57.63876769, -412.13271574])

In [36]:
rf_model.named_steps.regressor.intercept_

3851.451947531439