<a href="https://colab.research.google.com/github/Pradeep1070/Seoul-Bike-Sharing-Demand-Prediction/blob/main/fcc_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
import copy
import tensorflow as tf
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

### Data Set

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cols = ["bike_count", "hour", "temp", "humidity", "wind", "visibility", "dew_pt_temp", "radiation", "rain", "snow", "functional"]
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Practises/SeoulBikeData.csv', encoding='Latin-1').drop(['Date','Holiday','Seasons'], axis=1)
df.head()

In [None]:
df.columns = cols
df['functional'] = (df['functional'] == "Yes").astype(int)
df = df[df['hour']==12]
df = df.drop(['hour'], axis=1)
df.head()

In [None]:
for col in df.columns[1:]:
  plt.scatter (df[col],df['bike_count'])
  plt.title(col)
  plt.xlabel(col)
  plt.ylabel('bike_count_at_noon')
  plt.legend()
  plt.show()

In [None]:
df = df.drop(['wind','visibility', 'functional'], axis = 1)
df.head()

### Train Test Valid

In [None]:
train, val, test = np.split(df.sample(frac=1), [int(len(df)*0.6), int(len(df)*0.8)])

In [None]:
def xy_split(dataf, y_label, x_labels=None):
  dataf_new = copy.deepcopy(dataf)

  if x_labels is None:
    x_data = dataf_new[[l for l in dataf.columns if l != y_label]].values
  else:
    if len(x_labels) == 1:
      x_data = dataf_new[x_labels[0]].values.reshape(-1,1)
    else:
      x_data = dataf_new[x_labels].values

  y_data = dataf_new[y_label].values.reshape(-1,1)
  data = np.hstack((x_data,y_data))

  return data, x_data, y_data

In [None]:
train, val, test = np.split(df.sample(frac=1), [int(len(df)*0.6), int(len(df)*0.8)])

_, x_train_temp, y_train_temp,  = xy_split(train, 'bike_count', x_labels=['temp'])
_, x_val_temp, y_val_temp,  = xy_split(val, 'bike_count', x_labels=['temp'])
_, x_test_temp, y_test_temp,  = xy_split(test, 'bike_count', x_labels=['temp'])

_, x_train, y_train = xy_split(train, 'bike_count', x_labels=df.columns[1:])
_, x_test, y_test = xy_split(test, 'bike_count', x_labels=df.columns[1:])
_, x_val, y_val = xy_split(val, 'bike_count', x_labels=df.columns[1:])

### Simple Linear Reg

In [None]:
lin_reg = LinearRegression()
lin_reg.fit(x_train_temp, y_train_temp)

In [None]:
print(lin_reg.coef_, lin_reg.intercept_)

In [None]:
lin_reg.score(x_test_temp, y_test_temp) #higher this val higher these numbers corelate

In [None]:
y_preds = lin_reg.predict(x_test_temp)

In [None]:
plt.figure(figsize=(6,4))
plt.scatter(x_train_temp, y_train_temp, label = 'Data', color='Blue')
plt.xlabel('temp')
plt.ylabel('no of bike rentals')

x = tf.linspace(-20,40,100)
plt.plot(x, lin_reg.predict(np.array(x).reshape(-1,1)), label = 'pred', color = 'red')

plt.legend()
plt.show()

In [None]:
mean_squared_error(lin_reg.predict(x_val_temp), y_val_temp)

### Multiple Linear Regression

In [None]:
mult_lin_reg = LinearRegression()
mult_lin_reg.fit(x_train,y_train)

In [None]:
mult_lin_reg.score(x_test,y_test)

In [None]:
print(mult_lin_reg.coef_, mult_lin_reg.intercept_)

In [None]:
mean_squared_error(mult_lin_reg.predict(x_val), y_val)

### neural networks for simple linear regression





In [None]:
def plot_loss(history):
  plt.figure(figsize=(5,3))
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.xlabel('epochs')
  plt.ylabel('MSE')
  plt.legend()
  plt.grid(True)
  plt.show()

In [None]:
temp_normalizer = tf.keras.layers.Normalization(input_shape=(1,), axis=None)
temp_normalizer.adapt(x_train_temp.reshape(-1))

temp_nn_model = tf.keras.Sequential([
    temp_normalizer,
    tf.keras.layers.Dense(1) # having just one node as well as not having an activation function makes the network linear
])

temp_nn_model.compile(
    loss = "mean_squared_error",
    optimizer=tf.keras.optimizers.Adam(learning_rate=1)
)

history = temp_nn_model.fit(
    x_train_temp.reshape(-1), y_train_temp,
    epochs = 1000,
    validation_data = (x_val_temp.reshape(-1),y_val_temp),
    verbose = 0
    )


In [None]:
plot_loss(history)

In [None]:
plt.figure(figsize=(6,4))
plt.scatter(x_train_temp, y_train_temp, label = 'Data', color='Blue')
plt.xlabel('temp')
plt.ylabel('no of bike rentals')

x = tf.linspace(-20,40,100)
plt.plot(x, temp_nn_model.predict(np.array(x).reshape(-1,1)), label = 'pred', color = 'red')

plt.legend()
plt.show()

In [None]:
temp_nn_model.evaluate(x_val_temp,y_val_temp)

Learning rarte vs mse vs epochs to converge

0.1 - 139827.984375 - 1000

1 - 136041.515625 - 200

0.01 - 509329.4375 - 1000 (linear graph)

In [None]:
from sklearn.metrics import mean_squared_error

mean_squared_error(temp_nn_model.predict(x_val_temp),y_val_temp)

### neural net multiple linear reg

In [None]:
multi_nn_norm = tf.keras.layers.Normalization(input_shape=(6,), axis=None)
multi_nn_norm.adapt(x_train)

multi_reg_nn_model = tf.keras.Sequential([
    multi_nn_norm,
    tf.keras.layers.Dense(32, activation='relu'),
    # tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(16, activation='relu'),
    # tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation='relu')
])

multi_reg_nn_model.compile(
    loss = 'mean_squared_error',
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
)

history = multi_reg_nn_model.fit(
    x_train,y_train,
    validation_data=(x_val,y_val),
    epochs=100,
    # batch_size=
    verbose=0,
)


In [None]:
plot_loss(history)

In [None]:
multi_reg_nn_model.evaluate(x_val,y_val)

### neural networks for simple linear regression 2

In [None]:
def plot_loss(history):
  plt.figure(figsize=(5,3))
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.xlabel('epochs')
  plt.ylabel('MSE')
  plt.legend()
  plt.grid(True)
  plt.show()

In [None]:
temp_normalizer = tf.keras.layers.Normalization(input_shape=(1,), axis=None)
temp_normalizer.adapt(x_train_temp.reshape(-1))

temp_nn_model = tf.keras.Sequential([
    temp_normalizer,
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='relu')
])

temp_nn_model.compile(
    loss = "mean_squared_error",
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01)
)

history = temp_nn_model.fit(
    x_train_temp.reshape(-1), y_train_temp,
    epochs = 100,
    validation_data = (x_val_temp.reshape(-1),y_val_temp),
    verbose = 0
    )


In [None]:
plot_loss(history)

In [None]:
temp_nn_model.evaluate(x_val_temp,y_val_temp)

In [None]:
plt.figure(figsize=(6,4))
plt.scatter(x_train_temp, y_train_temp, label = 'Data', color='Blue')
plt.xlabel('temp')
plt.ylabel('no of bike rentals')

x = tf.linspace(-20,40,100)
plt.plot(x, temp_nn_model.predict(np.array(x).reshape(-1,1)), label = 'pred', color = 'red')

plt.legend()
plt.show()

### neural net multiple linear reg 2

In [None]:
def plot_loss(history):
  plt.plot(history.history['loss'], label='train loss')
  plt.plot(history.history['val_loss'], label='val loss')
  plt.title('loss')
  plt.xlabel('epochs')
  plt.ylabel('MSE')
  plt.grid(True)
  plt.legend()
  plt.show()

In [None]:
x_train.shape

In [None]:
multi_reg_nn_normalizer_2 = tf.keras.layers.Normalization(input_shape=(6,),axis=None)
multi_reg_nn_normalizer_2.adapt(x_train)

multi_reg_nn_model = tf.keras.Sequential([
    multi_reg_nn_normalizer_2,
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='relu')
])

multi_reg_nn_model.compile(
    tf.keras.optimizers.Adam(learning_rate=0.001),
    loss = 'mean_squared_error'
)

history = multi_reg_nn_model.fit(
  x_train, y_train,
  epochs = 100,
  # batch_size=
  validation_data = (x_val,y_val),
  verbose=0
)


In [None]:
plot_loss(history)

In [None]:
multi_reg_nn_model.evaluate(x_val,y_val)

### comparing sklearn linear regression was neural net linear regression

In [None]:
def mean_squared_error(y_preds,y_true):
  return (np.square(y_preds - y_true)).mean()

In [None]:
y_preds_mult_lin_reg = mult_lin_reg.predict(x_val)
y_preds_multi_reg_nn_model = multi_reg_nn_model.predict(x_val)

In [None]:
print(mean_squared_error(y_preds_mult_lin_reg,y_val))
print(mean_squared_error(y_preds_multi_reg_nn_model,y_val))

In [None]:
df['bike_count'].max()

In [None]:
plt.figure(figsize=(10,6))
ax = plt.axes(aspect='equal')
plt.scatter(y_preds_mult_lin_reg,y_val,label='lin_reg_preds',color='blue')
plt.scatter(y_preds_multi_reg_nn_model,y_val,label='nn_preds',color='orange')
plt.ylabel('predictions')
plt.xlabel('actual values')
limits=[0,1800]
plt.xlim(limits)
plt.ylim(limits)
plt.plot(y_val,y_val, label='', color='red')
plt.legend()
plt.show()