In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
X.shape

(442, 10)

In [4]:
def get_rmse(y_true, y_pred):
    mse = np.mean((y_true - y_pred)**2)
    return np.sqrt(mse)

def predict(X, weights, bias):
    predictions = np.dot(X, weights) + bias
    return predictions
    
    

def compute_cost(pred_y, actual_y, n_samples):
    cost = np.sum((pred_y - actual_y) ** 2)
    total_cost = (1 / n_samples) * cost
    return total_cost

def fit(X, y, iterations, alpha):
    n_samples, n_features = X.shape
    weights = np.zeros(n_features)
    bias = 0

    for i in range(iterations):
        predictions = np.dot(X, weights) + bias

        dw = (2 / n_samples) * np.dot(X.T, (predictions - y))
        db = (2 / n_samples) * np.sum(predictions - y)

        weights = weights - alpha * dw
        bias = bias - alpha * db

        if i % 1000 == 0:   
            print(f"Cost after {i} iterations: ", compute_cost(predictions, y, n_samples))

    return weights, bias

        

In [5]:
weights, bias = fit(X_train, y_train, 10000, 0.1)

y_pred = predict(X_test, weights, bias)

print("RMSE: ", get_rmse(y_test, y_pred))


Cost after 0 iterations:  29711.32294617564
Cost after 1000 iterations:  3443.435974236549
Cost after 2000 iterations:  3094.0243691310884
Cost after 3000 iterations:  2979.000791513324
Cost after 4000 iterations:  2933.732691765401
Cost after 5000 iterations:  2914.701553373165
Cost after 6000 iterations:  2906.181887063745
Cost after 7000 iterations:  2902.093735818749
Cost after 8000 iterations:  2899.97284792193
Cost after 9000 iterations:  2898.7714161352656
RMSE:  53.667522605932795


In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print("RMSE:", rmse)

print(model.coef_)
print(model.intercept_)


RMSE: 53.85344583676593
[  37.90402135 -241.96436231  542.42875852  347.70384391 -931.48884588
  518.06227698  163.41998299  275.31790158  736.1988589    48.67065743]
151.34560453985995
