# Learning from freecodecamp
- Youtube: https://youtu.be/i_LwzRVP7bg?si=o6audQvtlLZ_RLzR
- Dataset: https://archive.ics.uci.edu/dataset/560/seoul+bike+sharing+demand
- Dataset "Seoul Bike Sharing Demand" is used from UCI Machine Learning Repository

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppress INFO and WARNING
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'  # Disable oneDNN warnings

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import StandardScaler
import copy
import seaborn as sns
import tensorflow as tf
from sklearn.linear_model import LinearRegression

In [None]:
dataset_cols = ["bike_count", "hour", "temp", "humidity", "wind", "visibility", "dew_pt_temp", "radiation", "rain", "snow", "functional"]
try:
    # First try latin-1 (most common fix)
    df = pd.read_csv("datasets/seoul+bike+sharing+demand/SeoulBikeData.csv", encoding='latin-1').drop(["Date", "Holiday", "Seasons"], axis=1)
    print("Success with latin-1 encoding!")
except:
    try:
        # Fallback to error ignore
        df = pd.read_csv("datasets/seoul+bike+sharing+demand/SeoulBikeData.csv", encoding='utf-8', errors='ignore').drop(["Date", "Holiday", "Seasons"], axis=1)
        print("Success with error ignore!")
    except Exception as e:
        print(f"Error: {e}")

In [None]:
df.columns = dataset_cols
df["functional"] = (df["functional"]=="Yes").astype(int)
df = df.drop(["hour"], axis = 1)

In [None]:
df.head()

In [None]:
for label in df.columns[1:]:
    plt.scatter(df[label], df["bike_count"])
    plt.title(label)
    plt.ylabel("Bike Count at Noon")
    plt.xlabel(label)
    plt.show()

In [None]:
df = df.drop(["wind", "visibility", "functional"], axis=1)

In [None]:
df.head()

## Train/valid/test dataset

In [None]:
train, val, test = np.split(df.sample(frac=1), [int(0.6*len(df)), int(0.8*len(df))])

In [None]:
def get_xy(dataframe, y_label, x_labels=None):
    dataframe = copy.deepcopy(dataframe)
    if x_labels is None:  # Change this line
        X = dataframe[[c for c in dataframe.columns if c!=y_label]].values
    else:
        if (len(x_labels) == 1):
            X = dataframe[x_labels[0]].values.reshape(-1, 1) # to make 2D
        else:
            X = dataframe[x_labels].values

    y = dataframe[y_label].values.reshape(-1, 1)
    data = np.hstack((X, y))

    return data, X, y

In [None]:
_, X_train_temp, y_train_temp = get_xy(train, "bike_count", x_labels=["temp"])
_, X_val_temp, y_val_temp = get_xy(val, "bike_count", x_labels=["temp"])
_, X_test_temp, y_test_temp = get_xy(test, "bike_count", x_labels=["temp"])

In [None]:
temp_reg = LinearRegression()
temp_reg.fit(X_train_temp, y_train_temp)

In [None]:
print(temp_reg.coef_, temp_reg.intercept_) #regression coefficient, intercept
print(temp_reg.score(X_test_temp, y_test_temp))

In [None]:
plt.scatter(X_train_temp, y_train_temp, label="Data", color="blue")
x = tf.linspace(-20, 40, 100)
x = np.array(x).reshape(-1, 1)
plt.plot(x, temp_reg.predict(x), label="Fit", color="red", linewidth=3)
plt.legend()
plt.title("Bikes vs Temp")
plt.ylabel("Number of bikes")
plt.xlabel("Temp")
plt.show()

## Multiple Linear Regression

In [None]:
train, val, test = np.split(df.sample(frac=1), [int(0.6*len(df)), int(0.8*len(df))])
_, X_train_all, y_train_all = get_xy(train, "bike_count", x_labels= df.columns[1:])
_, X_val_all, y_val_all = get_xy(val, "bike_count", x_labels= df.columns[1:])
_, X_test_all, y_test_all = get_xy(test, "bike_count", x_labels= df.columns[1:])

In [None]:
all_reg = LinearRegression()
all_reg.fit(X_train_all, y_train_all)

In [None]:
all_reg.score(X_test_all, y_test_all)

## Regression with Neural Net

In [None]:
def plot_loss(history):
    plt.plot(history.history['loss'], label='loss')
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.xlabel('Epoch')
    plt.ylabel('MSE')
    plt.grid(True)
    plt.show()

In [None]:
temp_normalizer = tf.keras.layers.Normalization(input_shape=(1,), axis=None)
temp_normalizer.adapt(X_train_temp.reshape(-1))

In [None]:
temp_nn_model = tf.keras.Sequential([
    temp_normalizer,
    tf.keras.layers.Dense(1)
])

In [None]:
temp_nn_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.1), loss='mean_squared_error')

In [None]:
history = temp_nn_model.fit(
    X_train_temp.reshape(-1), y_train_temp,
    verbose=0,
    epochs=100,
    validation_data=(X_val_temp, y_val_temp)
)

In [None]:
plt.scatter(X_train_temp, y_train_temp, label="Data", color="blue")
x = tf.linspace(-20, 40, 100)
plt.plot(x, temp_nn_model.predict(np.array(x).reshape(-1,1)), label="Fit", color="red", linewidth=3)
plt.legend()
plt.title("Bikes vs Temp")
plt.ylabel("Number of Bikes")
plt.xlabel("Temp")
plt.show()

## Neural Net

In [None]:
nn_model = tf.keras.Sequential([
    temp_normalizer,
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])
nn_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss="mean_squared_error")

In [None]:
history = nn_model.fit(
    X_train_temp, y_train_temp,
    validation_data=(X_val_temp, y_val_temp),
    verbose=0, epochs=100
)

In [None]:
plot_loss(history)

In [None]:
plt.scatter(X_train_temp, y_train_temp, label="Data", color="blue")
x = tf.linspace(-20, 40, 100)
plt.plot(x, nn_model.predict(np.array(x).reshape(-1,1)), label="Fit", color="red", linewidth=3)
plt.legend()
plt.title("Bikes vs Temp")
plt.ylabel("Number of Bikes")
plt.xlabel("Temp")
plt.show()