# 04 - Simple Linear Regression
- Simple Linear Regression memodelkan hubungan antara sebuah response variable dengan sebuah explanatory variable sebagai suatu garis lurus (linear)

## Sample Dataset

In [None]:
import pandas as pd

pizza = {'diameter': [6, 8, 10, 14, 18], 
         'harga': [7, 9, 13, 17.5, 18]}

pizza_df = pd.DataFrame(pizza)
pizza_df

### Visualisasi Data

In [None]:
import matplotlib.pyplot as plt

pizza_df.plot(kind='scatter', x='diameter', y='harga')

plt.title('Perbandingan Diameter dan Harga Pizza')
plt.xlabel('Diameter (inch)')
plt.ylabel('Harga (dollar)')
plt.xlim(0, 25)
plt.ylim(0, 25)
plt.grid(True)
plt.show()

## Penyesuaian Dataset

In [None]:
import numpy as np
X = np.array(pizza_df['diameter'])
y = np.array(pizza_df['harga'])

print(f'X:{X}')
print(f'y:{y}')

In [None]:
X = X.reshape(-1,1)
X.shape

In [None]:
X

## Training Simple Linear Regression Model

In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X,y)

## Visualisasi Simple Linear Regression Model

In [None]:
X_vis = np.array([0, 25]).reshape(-1, 1)
y_vis = model.predict(X_vis)

In [None]:
plt.scatter(X, y)
plt.plot(X_vis, y_vis, '-r')

plt.title('Perbandingan Diameter dan Harga Pizza')
plt.xlabel('Diameter (inch)')
plt.ylabel('Harga (dollar)')
plt.xlim(0, 25)
plt.ylim(0, 25)
plt.grid(True)
plt.show()

In [None]:
print(f'intercept: {model.intercept_}')
print(f'slope: {model.coef_}')

## Mencari nilai slope

In [None]:
print(f'X:\n{X}\n')
print(f'X flatten: {X.flatten()}\n')
print(f'y: {y}')

### Variance

In [None]:
variance_x = np.var(X.flatten(),ddof=1)
print(f'variance : {variance_x}')

### Covariance

In [None]:
np.cov(X.flatten(),y)

In [None]:
covariance_xy = np.cov(X.flatten(),y)[0][1]
print(f'covariance : {covariance_xy}')

### Slope

In [None]:
slope = covariance_xy / variance_x
print(f'slope : {slope}')

## Mencari Nilai Intercept

In [None]:
intercept = np.mean(y) - slope * np.mean(X)

print(f'intercept: {intercept}')

## Prediksi Harga Pizza

In [None]:
diameter_pizza = np.array([12, 20, 23]).reshape(-1, 1)
diameter_pizza

In [None]:
prediksi_harga = model.predict(diameter_pizza)
prediksi_harga

In [None]:
for dmtr, hrg in zip(diameter_pizza, prediksi_harga):
    print(f'Diameter: {dmtr} predilsi harga: {hrg}')

## Evaluasi Simple Linear Regression Model
### Training dan Testing DataSet

In [None]:
X_train = np.array([6,8,10,14,18]).reshape(-1,1)
y_train = np.array([7,9,13,17.5,18])

X_test = np.array([8,9,11,16,12]).reshape(-1,1)
y_test = np.array([11,8.5,15,18,11])

### Training dan Testing DataSet

In [None]:
model = LinearRegression()
model.fit(X_train,y_train)

### Evaluasi Linear Regression Model dengan Coefficient of Determination atau R-squared

In [None]:
from sklearn.metrics import r2_score

y_pred = model.predict(X_test)
r_squared = r2_score(y_test,y_pred)
print(f'R-squared : {r_squared}')

## Mencari nilai R-Squared (R2)

In [None]:
ss_res = sum([(y_i - model.predict(x_i.reshape(-1,1))[0])**2
             for x_i ,y_i in zip (X_test,y_test)])
print(f'ss_res : {ss_res}')

In [None]:
mean_y = np.mean(y_test)
ss_tot = sum([(y_i - mean_y)**2 for y_i in y_test])

print(f'ss_tot : {ss_tot}')

In [None]:
r_squared = 1 - (ss_res/ss_tot)
print(f'R-squared : {r_squared}')