<a href="https://colab.research.google.com/github/Andrei198203/Data_Science_HW3/blob/main/HW3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Task1

In [13]:
import numpy as np

def hypothesis(X, theta):
    """
    Calculates a linear regression hypothesis in vector form.

    Parameters:
    X -- feature matrix (rows are examples, columns are features)
    theta -- a vector of parameters (including the shift)

    Returns:
    h -- vector of hypotheses (predicted values)
    """
    h = np.dot(X, theta)
    return h

Task2

In [14]:
def compute_cost(X, y, theta):
    """
    Calculates the loss function for a linear regression.

    Parameters:
    X -- feature matrix (rows are examples, columns are features)
    y - vector of target values
    theta -- a vector of parameters (including bias)

    Returns:
    J -- the value of the loss function
    """
    m = len(y)
    h = hypothesis(X, theta)
    J = (1/(2*m)) * np.sum(np.square(h - y))
    return J

Task3

In [15]:
def gradient_descent_step(X, y, theta, alpha):
    """
    Performs a single gradient descent step for linear regression.

    Parameters:
    X -- feature matrix (rows are examples, columns are features)
    y - vector of target values
    theta - vector of parameters (including the shift)
    alpha -- gradient descent step

    Returns:
    theta -- updated parameter values after one step
    """
    m = len(y)
    h = hypothesis(X, theta)
    gradient = (1/m) * np.dot(X.T, (h - y))
    theta -= alpha * gradient
    return theta

Task4

In [16]:
import numpy as np
import pandas as pd
import requests
from io import StringIO

# Linear regression hypothesis function
def hypothesis(X, theta):
    return np.dot(X, theta)

# Function for calculating the loss function
def compute_cost(X, y, theta):
    m = len(y)
    h = hypothesis(X, theta)
    J = (1/(2*m)) * np.sum(np.square(h - y))
    return J

# One step of gradient descent
def gradient_descent_step(X, y, theta, alpha):
    m = len(y)
    h = hypothesis(X, theta)
    gradient = (1/m) * np.dot(X.T, (h - y))
    theta -= alpha * gradient
    return theta

# Data normalisation
def normalize_features(X):
    """
    Normalises the features to the range [0, 1].

    Parameters:
    X -- feature matrix (rows are examples, columns are features)

    Returns:
    X_norm -- normalised features
    """
    mins = np.min(X, axis=0)
    maxs = np.max(X, axis=0)
    X_norm = (X - mins) / (maxs - mins)
    return X_norm

# Link to file
url = "https://drive.google.com/uc?id=1-rAa4XT4_fI0dOBlMNuE6a7jB0wln_Qo"

# Downloading data
response = requests.get(url)
content = response.content.decode('utf-8')
data = pd.read_csv(StringIO(content))

# Separation into attributes and target variable
X = data[['area', 'bathrooms', 'bedrooms']].values
y = data['price'].values

# Normalisation of features
X_normalized = normalize_features(X)

# Add a column with units (offset)
X_normalized = np.c_[X_normalized, np.ones(X_normalized.shape[0])]

# Initialising parameters
theta = np.zeros(X_normalized.shape[1])

# Gradient descent parameters
alpha = 0.01
iterations = 1500

# Gradient descent
for _ in range(iterations):
    theta = gradient_descent_step(X_normalized, y, theta, alpha)

print("The best parameters found using gradient descent:")
print(theta)

The best parameters found using gradient descent:
[2312999.60105339 1886635.22246029 1995918.88602716 3279504.28376392]


Task5

In [17]:
theta_analytical = np.linalg.inv(X.T @ X) @ X.T @ y
print("The best parameters found with the help of an analytical solution:")
print(theta_analytical)

The best parameters found with the help of an analytical solution:
[3.72448352e+02 1.37031315e+06 3.68974672e+05]


Task6

In [18]:
print("Comparison of results:")
print("Gradient descent:", theta)
print("Analytical solution:", theta_analytical)

Comparison of results:
Gradient descent: [2312999.60105339 1886635.22246029 1995918.88602716 3279504.28376392]
Analytical solution: [3.72448352e+02 1.37031315e+06 3.68974672e+05]
