In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
%matplotlib inline


In [None]:

data_path = Path('data/diabetes.csv')
df = pd.read_csv(data_path)
X = df.drop(columns=['target']).values
y = df['target'].values.reshape(-1, 1)


X = np.c_[np.ones((X.shape[0], 1)), X]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)
m, n = X_train.shape


In [None]:
def compute_cost(X, y, theta):
    m = len(y)
    return (1/(2*m)) * np.sum((X @ theta - y) ** 2)

def gradient_descent(X, y, lr=0.01, n_iter=5000):
    m, n = X.shape
    theta = np.zeros((n, 1))
    cost_history = []
    for i in range(n_iter):
        gradients = (1/m) * X.T @ (X @ theta - y)
        theta -= lr * gradients
        if i % 100 == 0:
            cost_history.append(compute_cost(X, y, theta))
    return theta, cost_history


In [None]:

theta_gd, costs = gradient_descent(X_train, y_train, lr=0.01, n_iter=10000)
print(f"Optimum parametreler ilk 5: {theta_gd.flatten()[:5]}")


In [None]:
y_pred = X_test @ theta_gd
mse_gd = mean_squared_error(y_test, y_pred)
print(f"Test MSE (Gradient Descent): {mse_gd:.4f}")


In [None]:
plt.plot(np.arange(len(costs))*100, costs)
plt.xlabel("İterasyon")
plt.ylabel("Cost (MSE/2)")
plt.title("Gradient Descent Cost Eğrisi")
plt.show()
