# Simple Regression

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import tensorflow as tf
import copy
from sklearn.linear_model import LinearRegression

In [2]:
dataset_cols = ['bike_count', 'hour', 'temp', 'humidity', 'wind', 'visibility', 'dew_pt_temp', 'radiation', 'rain', 'snow', 'functional']
df = pd.read_csv('data/SeoulBikeData.csv', encoding='latin1').drop(['Date', 'Seasons'], axis=1)
df.columns = dataset_cols

# Data Cleaning

In [3]:
df['functional'] = (df['functional'] == 'Yes').astype(int)
df = df[df['hour'] == 12]
df = df.drop(['hour'], axis=1)


In [None]:
print(df.head())

# Scatter Plot

In [None]:
for label in df.columns[1:]:
    plt.scatter(df[label], df['bike_count'])
    plt.title(label)
    plt.xlabel(label)
    plt.ylabel('BIke Count at Noon')
    plt.show()

# Further Cleaning after Plot Analysis

In [4]:
df = df.drop(['wind', 'visibility', 'functional'], axis=1)

# Train/Valid/Test Dataset Division

In [None]:
train, valid, test = np.split(df.sample(frac=1), [int(0.6*len(df)),int(0.8*len(df))])

# Function to get x value according to Regression Model(Simple/Multiple).

In [6]:
def get_xy(dataframe, y_label, x_labels=None):
    dataframe = copy.deepcopy(dataframe)
    if x_labels is None:
        X = dataframe [[c for c in dataframe.columns if c != y_label]].values
    else:
        if len(x_labels) == 1:
            X = dataframe[x_labels[0]].values.reshape(-1,1)
        else:
            X = dataframe[x_labels].values

    y = dataframe[y_label].values.reshape(-1,1)
    data = np.hstack((X, y))

    return data, X, y

In [30]:
_, x_train_temp, y_train_temp = get_xy(train, 'bike_count', x_labels=['temp'])
_, x_val_temp, y_val_temp = get_xy(valid, 'bike_count', x_labels=['temp'])
_, x_test_temp, y_test_temp = get_xy(test, 'bike_count', x_labels=['temp'])

In [None]:
temp_reg = LinearRegression()
temp_reg.fit(x_train_temp, y_train_temp)

In [None]:
temp_reg.score(x_test_temp, y_test_temp)

In [None]:
plt.scatter(x_train_temp, y_train_temp, label='data', color = 'blue')
x = tf.linspace(-20,40,100) #evenly spaced values in range (-20,40) 100 of those
plt.plot(x,temp_reg.predict(np.array(x).reshape(-1,1)), label ='Fit', color= 'red', linewidth=3)
plt.legend()
plt.title('BIkes vs Temp')
plt.ylabel('Number of bikes')
plt.xlabel('Temp')
plt.show()

# Multiple Linear Regression

In [8]:
_, x_train, y_train = get_xy(train, 'bike_count')
_, x_val, y_val = get_xy(valid, 'bike_count')
_, x_test, y_test = get_xy(test, 'bike_count')

In [None]:
all_reg = LinearRegression()
all_reg.fit(x_train, y_train)

In [None]:
all_reg.score(x_test, y_test)

# Regression with Neural Network

In [37]:
def plot_loss(history):
    plt.plot(history.history['loss'], label='loss')
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.xlabel('Epoch')
    plt.ylabel('MSE')
    plt.legend()
    plt.grid(True)
    plt.show()

# Simple Regression with NN

In [None]:
temp_normalizer = tf.keras.layers.Normalization(input_shape=(1,), axis=None)
temp_normalizer.adapt(x_train_temp.reshape(-1))

In [73]:
temp_nn_model = tf.keras.Sequential(
    [
        temp_normalizer,
        tf.keras.layers.Dense(1)
    ]
)

In [74]:
temp_nn_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.1), loss = 'mean_squared_error')

In [None]:
history = temp_nn_model.fit(
    x_train_temp.reshape(-1), y_train_temp,
    epochs = 1000,
    validation_data = (x_val_temp, y_val_temp)
)

In [None]:
plot_loss(history)

In [None]:
plt.scatter(x_train_temp, y_train_temp, label='data', color = 'blue')
x = tf.linspace(-20,40,100) #evenly spaced values in range (-20,40) 100 of those
plt.plot(x,temp_nn_model.predict(np.array(x).reshape(-1,1)), label ='Fit', color= 'red', linewidth=3)
plt.legend()
plt.title('BIkes vs Temp')
plt.ylabel('Number of bikes')
plt.xlabel('Temp')
plt.show()

# Multi Regression with NN

In [None]:
all_normalizer = tf.keras.layers.Normalization(input_shape= (6,), axis=-1)
all_normalizer.adapt(x_train)

In [128]:
all_nn_model = tf.keras.Sequential(
    [
        all_normalizer,
        tf.keras.layers.Dense(1)
    ]
)

In [129]:
all_nn_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),loss = 'mse', auto_scale_loss=True)

In [None]:
history = all_nn_model.fit(
    x_train, y_train,
    epochs = 1000,
    validation_split = 0.2
)

In [None]:
plot_loss(history)

# Neural Network

In [47]:

temp_normalizer = tf.keras.layers.Normalization(input_shape=(1,), axis=None)
temp_normalizer.adapt(x_train_temp.reshape(-1))
nn_model = tf.keras.Sequential(
[    temp_normalizer,
    tf.keras.layers.Dense(32, activation= 'relu'),
    tf.keras.layers.Dense(32, activation= 'relu'),
    tf.keras.layers.Dense(32, activation= 'relu'),
    tf.keras.layers.Dense(1)]
)
nn_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),loss = 'mse')

In [None]:
history = nn_model.fit(
    x_train_temp, y_train_temp,
    validation_data = (x_val_temp, y_val_temp),
    epochs=100
)

In [None]:
plot_loss(history)

In [None]:
plt.scatter(x_train_temp, y_train_temp, label='data', color = 'blue')
x = tf.linspace(-20,40,100) #evenly spaced values in range (-20,40) 100 of those
plt.plot(x,nn_model.predict(np.array(x).reshape(-1,1)), label ='Fit', color= 'red', linewidth=3)
plt.legend()
plt.title('BIkes vs Temp')
plt.ylabel('Number of bikes')
plt.xlabel('Temp')
plt.show()

In [56]:

all_normalizer = tf.keras.layers.Normalization(axis=-1)
all_normalizer.adapt(x_train)
nn_model = tf.keras.Sequential(
[    all_normalizer,
    tf.keras.layers.Dense(32, activation= 'relu'),
    tf.keras.layers.Dense(32, activation= 'relu'),
    tf.keras.layers.Dense(1, activation= 'relu')]
)
nn_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),loss = 'mse')

In [None]:
history = nn_model.fit(
    x_train, y_train,
    validation_data = (x_val, y_val),
    epochs=100
)

In [None]:
plot_loss(history)

In [None]:
y_pred_lr = all_reg.predict(x_test)
y_pred_nn = nn_model.predict(x_test)

In [64]:
def MSE(y_pred, y_true):
    return(np.square(y_pred - y_true)).mean()

In [None]:

ax = plt.axes(aspect= 'equal')
plt.scatter(y_test, y_pred_lr, label= 'Linear')
plt.scatter(y_test, y_pred_nn, label= 'NN')
plt.xlabel('True Value')
plt.ylabel('Predictions')
lims = [0, 1800]
plt.xlim(lims)
plt.ylim(lims)
plt.legend()
plt.plot(lims, lims, c = 'red')