## **GET DATASET**

In [None]:
# Example Simple Dataset
!wget https://download1514.mediafire.com/a2c2z3m5hqmgn32vhzJaN8MO72DRKx-3dBADaA3w8rehOq6s3UHxdGfwR3I6doCTt57bP53huh29v3_LZC-Eckj4n2aofKd-R-w8PfFh-V8Pkq6A0kuA9j7AXQA0ycwL3CDKkmC6OulN3v2YjkfgpIUkaDT0hZrJvmAG-qBju6wXfSgb/ddce95yi2nrck70/motivasi.csv

In [None]:
# UCI Bike Dataset
!wget https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip

In [None]:
# Unzip UCI Bike Dataset
!unzip bike+sharing+dataset.zip

## **PREPARING REQUIREMENTS**

In [112]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import copy
import seaborn as sns
import tensorflow as tf
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from mpl_toolkits.mplot3d import Axes3D

In [None]:
# df = pd.read_csv("student-mat.csv",  sep=";").drop(columns=["address", "school", "famsize", "Pstatus", "Mjob","Fjob", "reason", "guardian". "sex"])
# df = pd.read_csv("motif.csv").drop(columns=["dteday"])
# df = pd.read_csv("day.csv")
df = pd.read_csv("motivasi.csv", sep=';')
# df['schoolsup'] = df['schoolsup'].map({'yes': 1, 'no': 0})
df.head()

In [None]:
# Plotting each features
for label in df.columns[:-1]:
  plt.scatter(df[label], df["y"])
  plt.title(label)
  plt.ylabel("Features")
  plt.xlabel(label)
  plt.show()

## **PREPARING DATASET FOR TRAINING**

In [197]:
# 60% training data (random), 20% validation data, and 20% testing data
# train, val, test = np.split(df.sample(frac=1), [int(0.6*len(df)), int(0.8*len(df))])

# 60% training data (non-random), 20% validation data, and 20% testing data
total_len = len(df)
idx_train_end = int(0.6 * total_len)
idx_val_end = int(0.8 * total_len)
train = df[:idx_train_end]
val = df[idx_train_end:idx_val_end]
test = df[idx_val_end:]

# 100% training data
# train = df.copy()

In [95]:
# Function to split data into features and target
def get_xy(dataframe, y_label, x_labels=None):
    # Make a deep copy of the dataframe
    dataframe = copy.deepcopy(dataframe)

    # If x_labels are not provided (None), use all columns as features
    if x_labels is None:
        X = dataframe[[c for c in dataframe.columns if c != y_label]].values
    else:
        # If x_labels are provided
        if len(x_labels) == 1:
            # If only one x column is given, reshape it to the appropriate form
            X = dataframe[x_labels[0]].values.reshape(-1, 1)
        else:
            # If more than one x column is given, use all of them
            X = dataframe[x_labels].values

    # Get the target values (y) and reshape it into a 2D array
    y = dataframe[y_label].values.reshape(-1, 1)

    # Concatenate features (X) and target (y) into a single array
    data = np.hstack((X, y))

    # Return the data, features (X), and target (y)
    return data, X, y

## **TRAINING DATASET (SIMPLE LINEAR REGRESSION)**

In [198]:
_, X_train_x1, y_train_x1 = get_xy(train, "y", x_labels=[['x1']])
_, X_val_x1, y_val_x1 = get_xy(val, "y", x_labels=[['x1']])
_, X_test_x1, y_test_x1 = get_xy(test, "y", x_labels=[['x1']])

In [199]:
# Create a linear regression model object
x1_reg = LinearRegression()

# Fit the model with training data
x1_reg.fit(X_train_x1, y_train_x1)

## **TESING AND SCORE**

In [None]:
print("Intercept = ", x1_reg.intercept_)
print("Coeficient = ", x1_reg.coef_)

In [None]:
print("R-Square = ", x1_reg.score(X_train_x1, y_train_x1))
print("RMSE = ", np.sqrt(mean_squared_error(y_train_x1, x1_reg.predict(X_train_x1))))

In [None]:
# Randomly select a row from the DataFrame
df_random_row = df.sample(n=1)

# Specify the name of the feature column to be used
x_column_name = 'x1'  # Replace with the appropriate column name

# Retrieve the values of x and y
x_value = df_random_row[x_column_name].values[0]
y_value = df_random_row['y'].values[0]

# Calculate the prediction using the linear regression model
# predicted_y = temp_reg.predict(np.array([[x_value]]))[0]
predicted_y = x1_reg.intercept_ + x1_reg.coef_ * x_value

# Calculate the residual
residual = abs(predicted_y - y_value)

# Print the results
print("X = ", x_value)
print("Actual Y = ", y_value)
print("Predicted Y = ", predicted_y)
print("Residual = ", residual)

In [None]:
# Plotting relationship between x1 and y
plt.scatter(X_train_x1, y_train_x1, label="Data", color="blue")
x = tf.linspace(-20, 100, 100)
plt.plot(x, x1_reg.predict(np.array(x).reshape(-1, 1)), label="Fit", color="red", linewidth=3)
plt.legend()
plt.title("X1 vs Y")
plt.ylabel("y")
plt.xlabel("x1")
plt.show()

## **TRAINING DATASET (MULTIPLE LINEAR REGRESSION)**

In [204]:
_, X_train_all, y_train_all = get_xy(train, "y", x_labels=df.columns[:-1])
_, X_val_all, y_val_all = get_xy(val, "y", x_labels=df.columns[:-1])
_, X_test_all, y_test_all = get_xy(test, "y", x_labels=df.columns[:-1])

In [None]:
# Create a linear regression model object
all_reg = LinearRegression()

# Fit the model with training data
all_reg.fit(X_train_all, y_train_all)

## **TESTING AND SCORE**

In [None]:
print("Intercept = ", all_reg.intercept_)
print("Coeficient = ", all_reg.coef_)

In [None]:
print("R-Square = ", all_reg.score(X_train_all, y_train_all))
print("RMSE = ", np.sqrt(mean_squared_error(y_train_all, all_reg.predict(X_train_all))))

In [None]:
# Sample a random row from the DataFrame
df_random_row = df.sample(n=1)

# Extract 'x1' and 'x2' values
x_values = df_random_row[['x1', 'x2']].values[0]

# Retrieve the actual value from the 'y' column
y_value = df_random_row['y'].values[0]

# Calculate the prediction using the multi-linear regression model
predicted_y = all_reg.intercept_ + np.sum(all_reg.coef_ * x_values)

# Calculate the residual
residual = abs(predicted_y - y_value)

# Print the results
print("X values = ", x_values)
print("Actual Y = ", y_value)
print("Predicted Y = ", predicted_y)
print("Residual = ", residual)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

# Plotting x1, x2, and y
ax.scatter(df['x1'], df['x2'], df['y'], c='r', marker='o', label='Data')

# Including label axis
ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_zlabel('y')

# Showing plot
plt.title('Hubungan x1, x2, dan y')
plt.legend(loc='upper left')
plt.show()