In [4]:
import numpy as np

print("""The hypothesis function for linear regression in vectorized form.
It uses vectorization to perform matrix-vector multiplication efficiently,
allowing to compute model predictions for all examples at once.""")

def hypothesis(X, theta):
    """
    Hypothesis function for linear regression in vectorized form.

    Parameters:
        X (numpy.ndarray): Feature matrix.
        theta (numpy.ndarray): Model parameters vector.

    Returns:
        numpy.ndarray: Vector of model predictions.
    """
    return np.dot(X, theta)

The hypothesis function for linear regression in vectorized form.
It uses vectorization to perform matrix-vector multiplication efficiently,
allowing to compute model predictions for all examples at once.


In [5]:
print("Example 1:\n")

X = np.array([[1, 2], [1, 3], [1, 4]])  # Feature matrix (including the intercept term)
theta = np.array([0.5, 0.2])            # Model parameters
predictions = hypothesis(X, theta)
print(predictions)

Example 1:

[0.9 1.1 1.3]


In [6]:
print("Example 2:\n")

X = np.array([[1, 5], [1, 6], [1, 7]])
theta = np.array([0.3, 0.4])
predictions = hypothesis(X, theta)
print(predictions)

Example 2:

[2.3 2.7 3.1]


In [7]:
print("""The function to compute the loss function in vectorized form.
This function calculates the mean squared error,
which is a common choice for the loss function in linear regression problems.""")

import numpy as np

def loss_function(X, y, theta):
    """
    Loss function for linear regression in vectorized form.

    Parameters:
        X (numpy.ndarray): Feature matrix.
        y (numpy.ndarray): Target vector.
        theta (numpy.ndarray): Model parameters vector.

    Returns:
        float: Value of the loss function.
    """
    m = len(y)  # Number of training examples
    predictions = hypothesis(X, theta) # Compute predictions
    squared_errors = (predictions - y) ** 2 # Compute squared errors
    cost = (1 / (2 * m)) * np.sum(squared_errors) # Compute mean squared error
    return cost



The function to compute the loss function in vectorized form.
This function calculates the mean squared error, 
which is a common choice for the loss function in linear regression problems.


In [8]:
print("Example 1:\n")

X = np.array([[1, 2], [1, 3], [1, 4]])  # Feature matrix (including the intercept term)
y = np.array([2, 3, 4])                  # Target vector
theta = np.array([0.5, 0.2])            # Model parameters
loss = loss_function(X, y, theta)
print(loss)

Example 1:

2.0183333333333335


In [9]:
print("Example 2:\n")

X = np.array([[1, 5], [1, 6], [1, 7]])
y = np.array([3, 4, 5])
theta = np.array([0.3, 0.4])
loss = loss_function(X, y, theta)
print(loss)

Example 2:

0.9649999999999999


In [10]:
print("The computing the gradient of the loss function with respect to the model parameters.")

def gradient_descent_step(X, y, theta, learning_rate):
    """
    Perform one step of gradient descent.

    Parameters:
        X (numpy.ndarray): Feature matrix.
        y (numpy.ndarray): Target vector.
        theta (numpy.ndarray): Model parameters vector.
        learning_rate (float): Learning rate for gradient descent.

    Returns:
        numpy.ndarray: Updated model parameters vector.
    """
    m = len(y)  # Number of training examples
    predictions = hypothesis(X, theta) # Compute predictions
    errors = predictions - y # Compute errors
    gradient = (1 / m) * np.dot(X.T, errors) # Compute gradient
    new_theta = theta - learning_rate * gradient  # Update parameters
    return new_theta

The computing the gradient of the loss function with respect to the model parameters.


In [11]:
print("Checking how this function works on an example:\n")

# Define feature matrix X, target vector y, initial parameters theta, and learning rate
X = np.array([[1, 2], [1, 3], [1, 4]])  # Feature matrix (including the intercept term)
y = np.array([2, 3, 4])                  # Target vector
theta = np.array([0.5, 0.2])             # Initial model parameters
learning_rate = 0.1                      # Learning rate for gradient descent

# Perform one step of gradient descent
theta = gradient_descent_step(X, y, theta, learning_rate)
print("Updated parameters:", theta)


Checking how this function works on an example:

Updated parameters: [0.69       0.82333333]


In [12]:
print("The computing the gradient of the loss function with respect to the model parameters.")

def gradient_descent(X, y, theta, learning_rate, iterations):
    costs = []
    for i in range(iterations):
        theta = gradient_descent_step(X, y, theta, learning_rate)
        cost = loss_function(X, y, theta)
        costs.append(cost)
        # print(f"Iteration {i+1}/{iterations}, Cost: {cost}")
    return theta, costs

The computing the gradient of the loss function with respect to the model parameters.


In [30]:
import pandas as pd
import numpy as np
from io import StringIO
import requests

# URL of the CSV file
url = "https://raw.githubusercontent.com/Anastasia-front/data-science/main/csv/Housing.csv"

# Read the CSV file content from the URL
response = requests.get(url)
content = response.content.decode("utf-8")

# Use pandas read_csv to load the CSV data into a DataFrame
df = pd.read_csv(StringIO(content))

# Insert a column of ones at the beginning for the intercept term
df.insert(0, 'x0', 1)

# Splitting the data into features and target value
X = df[['x0', 'area', 'bedrooms', 'bathrooms']].values
y = df['price'].values

# Initial values of parameters
theta = np.zeros(X.shape[1])

# Learning rate and number of iterations
learning_rate = 0.00000001
iterations = 10000

# Running gradient descent
final_theta, costs = gradient_descent(X, y, theta, learning_rate, iterations)

print("Optimal parameters w:", final_theta)


Optimal parameters w: [ 36.07342826 855.61928545 136.69255633  76.90357991]


In [31]:
# Calculation of parameters using an analytical solution
w = np.linalg.inv(X.T @ X) @ X.T @ y

print("Optimal parameters w:", w)

Optimal parameters w: [-1.73171608e+05  3.78762754e+02  4.06820034e+05  1.38604950e+06]


In [32]:
from sklearn.linear_model import LinearRegression

# Separation of data into features and target value
X = df[['area', 'bathrooms', 'bedrooms']].values
y = df['price'].values

# Learning a linear regression model
model = LinearRegression()
model.fit(X, y)

# Getting predicted values
predictions_sklearn = model.predict(X)

print("Predicted values using LinearRegression with scikit-learn:")
print(predictions_sklearn[:5])

Predicted values using LinearRegression with scikit-learn:
[ 7036627.15462756 10392020.79073061  7591864.51496454  7066928.17491437
  5650577.65683656]
