# Task 2: Build a Simple Linear Regression

## Library

In [3]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import plotly.express as px
import plotly.graph_objects as go

## Load Data

In [4]:
# Storage directory
DATA_DIR = "data"
os.makedirs(DATA_DIR, exist_ok=True)

In [5]:
# Load dataset tanpa menyimpannya kembali
train_data = pd.read_csv(os.path.join(DATA_DIR, "house_train.csv"), header=None)
test_data = pd.read_csv(os.path.join(DATA_DIR, "house_test.csv"), header=None)

In [6]:
print("Train Data Shape:", train_data.shape)
print("Test Data Shape:", test_data.shape)

Train Data Shape: (405, 14)
Test Data Shape: (103, 14)


## Split Feature & Target

In [7]:
# Separate features and targets
X_train = train_data.iloc[:, :-1]  # All columns except the last one
y_train = train_data.iloc[:, -1]   # Last column as target
X_test = test_data.iloc[:, :-1]
y_test = test_data.iloc[:, -1]

## Make Model

In [8]:
# Create a linear regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

## Predict

In [9]:
# Predictions on test data
y_pred = model.predict(X_test)

## Interpretation

In [10]:
# Interpretation of coefficient
print("Intercept:", model.intercept_)
print("Koefisien:", model.coef_)

Intercept: 0.008306329663760298
Koefisien: [-0.16047214 -0.00503726 -0.02411065  0.12480188 -0.06817697  0.63843225
  0.07973562  0.09837476  0.15045473  0.07617821 -0.11532887  0.35077734
 -0.03406524]


## Model Evaluate

In [11]:
# Model evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

Mean Squared Error: 0.7100742633341507
R-squared: 0.7175114541152223


## Visualization

In [12]:
# Scatter plot dengan hover
fig = px.scatter(
    x=y_test, 
    y=y_pred, 
    labels={"x": "Actual Values", "y": "Predicted Values"},
    title="Actual vs Predicted Values"
)
fig.update_traces(
    mode="markers", 
    marker=dict(size=7, opacity=0.6), 
    hovertemplate="Actual: %{x}<br>Predicted: %{y}"
)
fig.show()

In [13]:
errors = y_test - y_pred

# Histogram error dengan hover
fig = go.Figure()

fig.add_trace(go.Histogram(
    x=errors,
    nbinsx=20,
    marker=dict(color="blue", line=dict(color="black", width=1)),
    opacity=0.7,
    hoverinfo="x+y"
))

fig.update_layout(
    title="Distribution of Prediction Errors",
    xaxis_title="Prediction Error",
    yaxis_title="Frequency",
    bargap=0.2
)

fig.show()