In [9]:
import numpy as np
import pandas as pd
import sklearn
import copy
import math
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import time

In [14]:
df = pd.read_csv('Flight_Price_Dataset_Q2.csv')
df

FileNotFoundError: [Errno 2] No such file or directory: 'Flight_Price_Dataset_Q2.csv'

In [None]:
dummies_columns = ['class']
for dummy in dummies_columns:
    dummies = pd.get_dummies(df[dummy], drop_first=True).astype(int)
    df = pd.concat([df, dummies], axis='columns')
    df = df.drop([dummy], axis='columns')
    
df

In [None]:
time_mapping = {'Early_Morning': 1, 'Morning': 2, 'Afternoon': 3, 'Evening': 4, 'Night': 5, 'Late_Night': 6}
df['departure_time'] = df['departure_time'].map(time_mapping)
df['arrival_time'] = df['arrival_time'].map(time_mapping)

stop_mapping = {'zero': 0, 'one': 1, 'two_or_more': 2}
df['stops'] = df['stops'].map(stop_mapping)

df

In [None]:
scaler = StandardScaler()
df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

df

In [None]:
df_x = df.drop('price', axis = 'columns')
x = df_x.values
y = df['price'].values

In [None]:
x_train, x_test, y_train , y_test = train_test_split(x, y, test_size = 0.2,random_state = 42, shuffle = True)

In [None]:
def compute_f(x, w, b):
    f_wb_i = np.dot(x, w) + b
    return f_wb_i

In [None]:
def compute_cost(X, w, b, Y):
    m = len(X)
    error = 0.0
    for i in range(m):
        error += ((compute_f(X[i], w, b) - Y[i]) ** 2)
    error_w = 0.0
    for j in range(len(w)):
        error_w += w[j] ** 2
    j = error / (2 * m) + ((1 / (2 * m)) * error_w)
    return j

In [None]:
def compute_gradient(X, w, b, Y):
    m = len(X)
    n = len(X[0])
    dj_dw = np.zeros((n,))
    dj_db = 0
    for i in range(m):
        err = compute_f(X[i], w, b) - Y[i]
        for j in range(n):
            dj_dw[j] += (err * X[i,j])
        dj_db += err
    dj_dw /= m
    dj_db /= m
    return dj_dw, dj_db

In [None]:
def compute_gradient_descent(X, Y, w_in, b_in, alpha, num_iterations):
    b = b_in
    w = copy.deepcopy(w_in)

    J_history = []
    y_predict = []

    for i in range(num_iterations):
        dj_dw, dj_db = compute_gradient(X, w, b, Y)
        w = w - (alpha * dj_dw)
        b = b - (alpha * dj_db)

        cost = compute_cost(X, w, b, Y)
        J_history.append(cost)

        f_k = np.ndarray(shape=(X.shape[0],), dtype=float)
        for k in range(X.shape[0]):
            f_k[k] = compute_f(X[k], w, b)
        r2 = r2_score(Y, f_k)

        # if i % math.ceil(num_iterations / 10) == 0:
        print("iteration: ", i + 1, "   cost: ", cost, "r2: ", r2)

    return J_history, w, b

In [None]:
initial_w = np.zeros(len(x[0]))
initial_b = 0
alpha = 0.1
num_itr = 100

J_his, w, b = compute_gradient_descent(x_train, y_train, initial_w, initial_b, alpha, num_itr)

In [None]:
np.set_printoptions(precision=2)


fig, (ax1, ax2) = plt.subplots(1, 2, constrained_layout=True, figsize=(12, 4))
ax1.plot(J_hist)
ax2.plot(100 + np.arange(len(J_hist[1:])), J_hist[1:])
ax1.set_title("Cost vs. iteration");  ax2.set_title("Cost vs. iteration (tail)")
ax1.set_ylabel('Cost')             ;  ax2.set_ylabel('Cost') 
ax1.set_xlabel('iteration step')   ;  ax2.set_xlabel('iteration step') 
plt.show()