<a href="https://colab.research.google.com/github/IrsyadMajid/FP_StatistikaKomputasi/blob/main/FinalProject_StatistikaKomputasi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('diamonds.csv')

In [None]:
df.info()
df.describe()

In [None]:
sns.histplot(df['price'], bins=30, kde=True)
plt.title("Persebaran harga")
plt.xlabel("Harga")
plt.ylabel("Frekuensi")
plt.show()

In [None]:
features = ['carat', 'clarity', 'depth', 'table']
X = df[features]
Y = df['price']

In [None]:
if 'clarity' in X.columns and X['clarity'].dtype == 'object':
    X = pd.get_dummies(X, columns=['clarity'], drop_first=True)

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)

In [None]:
model_lr = LinearRegression()
model_lr.fit(X_train, Y_train)
Y_pred_lr = model_lr.predict(X_test)

In [None]:
print("Linear Regression:")
print(f"MAE: {mean_absolute_error(Y_test, Y_pred_lr)}")
print(f"MSE: {mean_squared_error(Y_test, Y_pred_lr)}")
print(f"R-squared: {r2_score(Y_test, Y_pred_lr)}")

In [None]:
ridge = Ridge()
parameters = {'alpha': [0.1, 1, 10, 100, 1000]}
grid_ridge = GridSearchCV(ridge, parameters, cv=5, scoring='neg_mean_squared_error')
grid_ridge.fit(X_train, Y_train)

In [None]:
best_ridge = grid_ridge.best_estimator_

print("\nRidge Regression (Best Parameters):")
print(f"Alpha: {grid_ridge.best_params_['alpha']}")
Y_pred_ridge = best_ridge.predict(X_test)
print(f"MAE: {mean_absolute_error(Y_test, Y_pred_ridge)}")
print(f"MSE: {mean_squared_error(Y_test, Y_pred_ridge)}")
print(f"R-squared: {r2_score(Y_test, Y_pred_ridge)}")

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(Y_test, Y_pred_ridge, alpha=0.6, color="b", label="Ridge Predictions")
plt.plot([Y.min(), Y.max()], [Y.min(), Y.max()], '--r', linewidth=2, label="Ideal Line")
plt.xlabel("Harga Asli")
plt.ylabel("Harga Prediksi")
plt.title("Ridge Regression: Prediksi vs Asli")
plt.legend()
plt.show()