In [152]:
# Manejo de datos
import numpy as np
import seaborn as sns
import datetime as dt
import tensorflow as tf
# Gráficas
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
# Modelos
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from keras import layers, models, optimizers
from joblib import dump, load
from sklearn.preprocessing import StandardScaler
# Extras
from tqdm import tqdm

In [151]:
from sklearn.externals.joblib import dump, load

ModuleNotFoundError: No module named 'sklearn.externals.joblib'

In [2]:
tabla = pd.read_excel('bike_train.xlsx')

In [4]:
def weekday_to_isweekend(num):
    if num == 0 or num == 6:
        return 1
    else:
        return 0

In [14]:
df = tabla.copy()
df_x = df[['weekday', 'weathersit', 'mnth', 'atemp', 'hr']].copy()
df_x['weekend'] = df_x['weekday'].map(weekday_to_isweekend)
df_x.drop(columns=['weekday'],inplace=True)
df_x = df_x[['hr', 'weekend', 'mnth', 'weathersit', 'atemp']]
df_x.head()

Unnamed: 0,hr,weekend,mnth,weathersit,atemp
0,0,1,1,1,0.2879
1,1,1,1,1,0.2727
2,2,1,1,1,0.2727
3,3,1,1,1,0.2879
4,4,1,1,1,0.2879


In [16]:
X = np.array(df_x.values)
Y = np.array(df['cnt'].values.reshape(-1,1))

----

In [32]:
def build_model_regression(num_input_data, lr_=0.01, loss = 'mse'):
    model = models.Sequential()
    model.add(layers.Dense(num_input_data, activation='relu', input_shape=(num_input_data,)))
    model.add(layers.Dense(20, activation='relu'))
    model.add(layers.Dense(20, activation='relu'))
    model.add(layers.Dense(20, activation='relu'))
    model.add(layers.Dense(1,  activation='relu'))
    model.compile(optimizer = optimizers.SGD(lr_, momentum=0.9),
    # model.compile(optimizer = optimizers.Adam(lr_),
                  loss = loss,
                  metrics = 'mse')
    return model

def find_best_model(X, Y, intentos = 30, lr = 0.01, epochs = 100, loss='mse'):
    error_min = 9999999
    x_tr, x_te, y_tr, y_te = train_test_split(X, Y, test_size=0.3)
    for intento in tqdm(range(intentos), desc='Estamos trabajando en encontrar el mejor modelo:'):
        model_ = build_model_regression(num_input_data=X.shape[1], lr_=lr, loss=loss)
        hist = model_.fit(x_tr, y_tr,
                         batch_size=x_tr.shape[0],
                         epochs=epochs,
                         verbose=0,
                         validation_data=(x_te, y_te))
        y_pr = model_.predict(x_te, verbose = 0)
        error = mean_squared_error(y_te, y_pr)
        if error < error_min:
            error_min = error
            model = model_
            history = hist
            (X_tr, X_te, Y_tr, Y_te) = (x_tr, x_te, y_tr, y_te)
    print('Hemos finalizado!')
    return model, history, X_tr, X_te, Y_tr, Y_te

In [33]:
model, history, x_tr, x_te, y_tr, y_te = find_best_model(X, Y, intentos=5, loss='mean_squared_logarithmic_error')
px.line(np.array(history.history['loss']))

Estamos trabajando en encontrar el mejor modelo:: 100%|██████████| 5/5 [00:22<00:00,  4.47s/it]

Hemos finalizado!





In [31]:
Y_pr = model.predict(X)
fig = go.Figure()
fig.add_trace(go.Scatter(y= Y.reshape(-1)[:100], mode='lines+markers', name='test'))
fig.add_trace(go.Scatter(y= Y_pr.reshape(-1)[:100], mode='markers', name='model'))
fig.show()



----

In [26]:
df_weekend = tabla[['weekday','hr']].copy()
df_weekend['weekend'] = df_weekend['weekday'].map(weekday_to_isweekend)
df_weekend.drop(columns=['weekday'], inplace = True)
df_weekend = pd.concat([pd.get_dummies(df_weekend['hr']), df_weekend], axis = 1)
df_weekend.drop(columns=['hr'], inplace = True)
df_weekend.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,weekend
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [27]:
X_weekend = np.array(df_weekend.values)
Y = tabla['cnt'].values.reshape(-1,1)

In [90]:
def build_model_regression(num_input_data, lr_=0.01, loss = 'mse'):
    model = models.Sequential()
    model.add(layers.Dense(num_input_data, activation='linear', input_shape=(num_input_data,)))
    model.add(layers.Dense(10, activation='linear'))
    model.add(layers.Dense(10, activation='linear'))
    model.add(layers.Dense(10, activation='linear'))
    # model.add(layers.Dense(20, activation='relu'))
    model.add(layers.Dense(1,  activation='relu'))
    # model.compile(optimizer = optimizers.SGD(lr_, momentum=0.9),
    model.compile(optimizer = optimizers.Adam(lr_),
                  loss = loss,
                  metrics = 'mse')
    return model

def find_best_model(X, Y, intentos = 30, lr = 0.01, epochs = 100, loss='mse'):
    error_min = 9999999
    x_tr, x_te, y_tr, y_te = train_test_split(X, Y, test_size=0.3)
    for intento in tqdm(range(intentos), desc='Estamos trabajando en encontrar el mejor modelo:'):
        model_ = build_model_regression(num_input_data=X.shape[1], lr_=lr, loss=loss)
        hist = model_.fit(x_tr, y_tr,
                         batch_size=x_tr.shape[0],
                         epochs=epochs,
                         verbose=0,
                         validation_data=(x_te, y_te))
        y_pr = model_.predict(x_te, verbose = 0)
        error = mean_squared_error(y_te, y_pr)
        if error < error_min:
            error_min = error
            model = model_
            history = hist
            (X_tr, X_te, Y_tr, Y_te) = (x_tr, x_te, y_tr, y_te)
    print('Hemos finalizado!')
    return model, history, X_tr, X_te, Y_tr, Y_te

In [48]:
model, history, X_tr, X_te, Y_tr, Y_te = find_best_model(X_weekend, Y, intentos=5)
px.line(np.array(history.history['loss']))

Estamos trabajando en encontrar el mejor modelo:: 100%|██████████| 5/5 [00:21<00:00,  4.35s/it]

Hemos finalizado!





In [49]:
Y_pr = model.predict(X_weekend)
print('RMSE1:',mean_squared_error(Y, Y_pr, squared=False))
fig = go.Figure()
fig.add_trace(go.Scatter(y= Y.reshape(-1)[5900:6100], mode='lines+markers', name='test'))
fig.add_trace(go.Scatter(y= Y_pr.reshape(-1)[5900:6100], mode='markers', name='model'))
fig.show()

RMSE1: 98.497447072672


In [50]:
Y_pr[df_weekend['weekend'] == 1]
fig = go.Figure()
fig.add_trace(go.Scatter(y= Y[df_weekend['weekend'] == 1].reshape(-1)[:100], mode='lines+markers', name='test'))
fig.add_trace(go.Scatter(y= Y_pr[df_weekend['weekend'] == 1].reshape(-1)[:100], mode='markers', name='model'))
fig.show()

### weekend

In [120]:
df_weekend = tabla[['weekday']].copy()
df_weekend['weekend'] = df_weekend['weekday'].map(weekday_to_isweekend)
df_weekend.drop(columns=['weekday'], inplace = True)
X_weekend = np.array(df_weekend.values)
Y = tabla['cnt'].values.reshape(-1,1)
model_weekend, history, X_tr, X_te, Y_tr, Y_te = find_best_model(X_weekend, Y, intentos=5)
px.line(np.array(history.history['loss']))

Estamos trabajando en encontrar el mejor modelo:: 100%|██████████| 5/5 [00:23<00:00,  4.77s/it]

Hemos finalizado!





In [121]:
y_weekend = model_weekend.predict(X_weekend)
print('RMSE1:',mean_squared_error(Y, y_weekend, squared=False))
fig = go.Figure()
fig.add_trace(go.Scatter(y= Y.reshape(-1)[5900:6100], mode='lines+markers', name='test'))
fig.add_trace(go.Scatter(y= y_weekend.reshape(-1)[5900:6100], mode='markers', name='model'))
fig.show()

RMSE1: 152.44505209522194


### hr

In [118]:
df_hr = tabla[['hr']].copy()
df_hr = pd.get_dummies(df_hr['hr'])
X_hr = np.array(df_hr.values)
model_hr, history, X_tr, X_te, Y_tr, Y_te = find_best_model(X_hr, Y, intentos=5)
px.line(np.array(history.history['loss']))


Estamos trabajando en encontrar el mejor modelo:: 100%|██████████| 5/5 [00:23<00:00,  4.74s/it]

Hemos finalizado!





In [119]:
y_hr = model_hr.predict(X_hr)
print('RMSE1:',mean_squared_error(Y, y_hr, squared=False))
fig = go.Figure()
fig.add_trace(go.Scatter(y= Y.reshape(-1)[:100], mode='lines+markers', name='test'))
fig.add_trace(go.Scatter(y= y_hr.reshape(-1)[:100], mode='markers', name='model'))
fig.show()

RMSE1: 106.24255869852405


### weathersit

In [107]:
df_ws = tabla[['weathersit']].copy()
df_ws = pd.get_dummies(df_ws['weathersit'])
X_ws = np.array(df_ws.values)
model_ws, history, X_tr, X_te, Y_tr, Y_te = find_best_model(X_ws, Y, intentos=5)
px.line(np.array(history.history['loss']))

Estamos trabajando en encontrar el mejor modelo:: 100%|██████████| 5/5 [00:21<00:00,  4.21s/it]

Hemos finalizado!





In [109]:
y_ws = model_ws.predict(X_ws)
print('RMSE1:',mean_squared_error(Y, y_ws, squared=False))
fig = go.Figure()
fig.add_trace(go.Scatter(y= Y.reshape(-1)[:100], mode='lines+markers', name='test'))
fig.add_trace(go.Scatter(y= y_ws.reshape(-1)[:100], mode='markers', name='model'))
fig.show()

RMSE1: 149.59596007790935


### month

In [110]:
df_mnth = tabla[['mnth']].copy()
df_mnth = pd.get_dummies(df_mnth['mnth'])
X_mnth = np.array(df_mnth.values)
model_mnth, history, X_tr, X_te, Y_tr, Y_te = find_best_model(X_mnth, Y, intentos=5)
px.line(np.array(history.history['loss']))

Estamos trabajando en encontrar el mejor modelo:: 100%|██████████| 5/5 [00:24<00:00,  4.85s/it]

Hemos finalizado!





In [112]:
y_mnth = model_mnth.predict(X_mnth)
print('RMSE1:',mean_squared_error(Y, y_mnth, squared=False))
fig = go.Figure()
fig.add_trace(go.Scatter(y= Y.reshape(-1)[:100], mode='lines+markers', name='test'))
fig.add_trace(go.Scatter(y= y_mnth.reshape(-1)[:100], mode='markers', name='model'))
fig.show()

RMSE1: 146.44744828425164


### Atemp

In [113]:
df_atemp = tabla[['atemp']].copy()
X_atemp = np.array(df_atemp.values)
model_atemp, history, X_tr, X_te, Y_tr, Y_te = find_best_model(X_atemp, Y, intentos=5)
px.line(np.array(history.history['loss']))

Estamos trabajando en encontrar el mejor modelo:: 100%|██████████| 5/5 [00:22<00:00,  4.52s/it]

Hemos finalizado!





In [114]:
y_atemp = model_atemp.predict(X_atemp)
print('RMSE1:',mean_squared_error(Y, y_atemp, squared=False))
fig = go.Figure()
fig.add_trace(go.Scatter(y= Y.reshape(-1)[:100], mode='lines+markers', name='test'))
fig.add_trace(go.Scatter(y= y_atemp.reshape(-1)[:100], mode='markers', name='model'))
fig.show()

RMSE1: 143.109570420946


### dteday

In [142]:
df_dt = tabla[['dteday']].copy()
df_dt['dteday'] = (df_dt['dteday'].map(dt.datetime.toordinal) - 734138)/365
X_dt = np.array(df_dt.values)
model_dt, history, X_tr, X_te, Y_tr, Y_te = find_best_model(X_dt, Y, intentos=5)
px.line(np.array(history.history['loss']))

Estamos trabajando en encontrar el mejor modelo:: 100%|██████████| 5/5 [00:22<00:00,  4.51s/it]

Hemos finalizado!





In [145]:
y_dt = model_dt.predict(X_dt)
print('RMSE1:',mean_squared_error(Y, y_dt, squared=False))
fig = go.Figure()
fig.add_trace(go.Scatter(y= Y.reshape(-1)[-1000:], mode='lines+markers', name='test'))
fig.add_trace(go.Scatter(y= y_dt.reshape(-1)[-1000:], mode='markers', name='model'))
fig.show()

RMSE1: 147.40154696440854


In [131]:
X_dt

array([[734138],
       [734138],
       [734138],
       ...,
       [734643],
       [734643],
       [734643]], dtype=int64)

## Juntamos todo

In [148]:
scaler_weekend = StandardScaler()
scaler_hr = StandardScaler()
scaler_ws = StandardScaler()
scaler_mnth = StandardScaler()
scaler_atemp= StandardScaler()
scaler_dt= StandardScaler()
scaled_weekend = scaler_weekend.fit_transform(y_weekend)
scaled_hr = scaler_hr.fit_transform(y_hr)
scaled_ws = scaler_ws.fit_transform(y_ws)
scaled_mnth = scaler_mnth.fit_transform(y_mnth)
scaled_atemp = scaler_atemp.fit_transform(y_atemp)
scaled_dt = scaler_dt.fit_transform(y_dt)
s_weekend = pd.Series(scaled_weekend.reshape(-1), name='weekend')
s_hr = pd.Series(scaled_hr.reshape(-1), name='hr')
s_ws = pd.Series(scaled_ws.reshape(-1), name='ws')
s_mnth = pd.Series(scaled_mnth.reshape(-1), name='mnth')
s_atemp = pd.Series(scaled_atemp.reshape(-1), name='atemp')
s_dt = pd.Series(scaled_dt.reshape(-1), name='dt')
X = np.array(pd.concat([s_weekend, s_hr, s_ws, s_mnth, s_atemp, s_dt], axis=1).values)

In [149]:
model, history, X_tr, X_te, Y_tr, Y_te = find_best_model(X, Y, intentos=5)
px.line(np.array(history.history['loss']))

Estamos trabajando en encontrar el mejor modelo:: 100%|██████████| 5/5 [00:21<00:00,  4.26s/it]

Hemos finalizado!





In [150]:
y_pr = model.predict(X)
print('RMSE1:',mean_squared_error(Y, y_pr, squared=False))
fig = go.Figure()
fig.add_trace(go.Scatter(y= Y.reshape(-1)[:100], mode='lines+markers', name='test'))
fig.add_trace(go.Scatter(y= y_pr.reshape(-1)[:100], mode='markers', name='model'))
fig.show()

RMSE1: 82.41125493551858


# Guardamos

In [156]:
dump(scaler_weekend, 'scalers/scaler_weekend.bin', compress=True)
dump(scaler_hr, 'scalers/scaler_hr.bin', compress=True)
dump(scaler_ws, 'scalers/scaler_ws.bin', compress=True)
dump(scaler_mnth, 'scalers/scaler_mnth.bin', compress=True)
dump(scaler_atemp, 'scalers/scaler_atemp.bin', compress=True)
dump(scaler_dt, 'scalers/scaler_dt.bin', compress=True)

['scalers/scaler_dt.bin']

In [157]:
model_weekend.save('models/model_weekend.h5')
model_hr.save('models/model_hr.h5')
model_ws.save('models/model_ws.h5')
model_mnth.save('models/model_mnth.h5')
model_atemp.save('models/model_atemp.h5')
model_dt.save('models/model_dt.h5')
model.save('models/modelo_1_0.h5')

# Cargamos

In [158]:
scaler_weekend = load('scalers/scaler_weekend.bin')
scaler_hr = load('scalers/scaler_hr.bin')
scaler_ws = load('scalers/scaler_ws.bin')
scaler_mnth = load('scalers/scaler_mnth.bin')
scaler_atemp = load('scalers/scaler_atemp.bin')
scaler_dt = load('scalers/scaler_dt.bin')

In [159]:
model_weekend = models.load_model('models/model_weekend.h5')
model_hr = models.load_model('models/model_hr.h5')
model_ws = models.load_model('models/model_ws.h5')
model_mnth = models.load_model('models/model_mnth.h5')
model_atemp = models.load_model('models/model_atemp.h5')
model_dt = models.load_model('models/model_dt.h5')
model = models.load_model('models/modelo_1_0.h5')