## Question 1

In [None]:
pip install --upgrade pandas

In [None]:
import pandas as pd
import numpy as np
import torch as tc
import matplotlib.pyplot as plt
import zipfile


In [None]:
# Extract the contents of the 'archive.zip' file into the 'extracts' directory
with zipfile.ZipFile("archive.zip", "r") as z:
    z.extractall("extracts")

# Read the CSV file 'car_web_scraped_dataset.csv' into a pandas DataFrame
car_dataframe = pd.read_csv("extracts/car_web_scraped_dataset.csv")


In [None]:
car_dataframe.head(5)

## Question 2

- This is suitable for regression because in regression, the goal is to understand the relationship between one or more independent variables and the dependent(target) variable which in this case could be the price which is also a continuous numeric outcome. The given dataset includes a numeric target variable, "price," and offers the opportunity to explore and quantify the relationships between various independent variables such as "year," "miles," "color," and "condition" in predicting the car prices.

## Question 3

<ol type="a">
  <li>
  
  </li>
 
 </br>
  <li>Milk</li>
</ol>

In [None]:
car_dataframe['price'].describe()

In [None]:
# Convert the 'price' column to numeric after removing any '$' and ',' characters
car_dataframe['price'] = pd.to_numeric(car_dataframe['price'].replace('[\$,]','', regex=True), errors='coerce')

# Calculate the quartiles of the 'price' column
quantiles = car_dataframe['price'].quantile(q=[0.25, 0.5, 0.75, 1])
quantiles

In [None]:
quantiles=quantiles.to_numpy()

In [None]:
def group_price(price):
    """
    Group the price into categories based on quantiles.
    Args:
    price (float): The price to be categorized.
    Returns:
    str: The category of the price.
    """
    if(price <= quantiles[0]):
        return 'cheap'
    elif(quantiles[0] < price <= quantiles[1]):
        return 'average'
    elif(quantiles[1] < price <= quantiles[2]):
        return 'expensive'
    elif(price > quantiles[2]):
        return 'very expensive'

In [None]:
car_dataframe['price_category'] = car_dataframe['price'].apply(group_price)

In [None]:
car_dataframe.head()

## Question 4

In [None]:
car_dataframe.dtypes

In [None]:
car_dataframe.dropna(inplace=True)

In [None]:
import pandas as pd

# One-hot encode categorical columns
car_dataframe = pd.get_dummies(car_dataframe, columns=['condition', 'color', 'price_category', 'name'], drop_first=True)

# Convert 'miles' column to numeric, removing non-numeric characters
car_dataframe['miles'] = pd.to_numeric(car_dataframe['miles'].replace('[\D,]','', regex=True ), errors='coerce')
car_dataframe.head()

In [None]:
# Convert the car_dataframe to float32 data type
car_dataframe = car_dataframe.astype('float32')
car_dataframe.head()

In [None]:
car_dataframe.dtypes

In [None]:
# Convert DataFrame columns to PyTorch tensors and  drop the price column as the labels
inputs = tc.tensor(car_dataframe.drop('price', axis=1).values, dtype=tc.float32)
outputs = tc.tensor(car_dataframe['price'].values, dtype=tc.float32)

<ol type='a'>
    <li>The input and the target tensors are</li>

<ol>

In [None]:
print(inputs)

In [None]:
print(outputs)

b. 

In [None]:
def generate_random_params(num_params):
    """
    Generate random parameters with the specified number of parameters.
    Args:
    num_params (int): The number of parameters to generate.
    Returns:
    torch.Tensor: Randomly generated parameters with the specified number of parameters.
    """
    weights = tc.rand((num_params, 1), requires_grad=True)
    return weights

In [None]:
input_size = inputs.shape
input_size

In [None]:
num_params = inputs.shape[1]
random_params = generate_random_params(num_params)
print("Random parameters =  ", random_params)

c.

In [None]:
def linear_regression(inputs, weights, bias):
    """
    Perform linear regression on the given inputs using the provided weights and bias.

    Args:
    inputs (tensor): The input tensor for the regression.
    weights (tensor): The weights tensor for the regression.
    bias (tensor): The bias tensor for the regression.

    Returns:
    tensor: The result of the linear regression.
    """
    return tc.matmul(inputs, weights) + bias

In [None]:
def mean_squared_error(outputs, labels):
    return tc.mean((outputs - labels)**2)

In [None]:
predicitons = linear_regression(inputs, random_params, 0)
pd.DataFrame({'predictions': predicitons.view(-1).detach().numpy(), 'labels': outputs.view(-1).detach().numpy()})

In [None]:
squared_error = mean_squared_error(predicitons, outputs)
print("Mean Squared Error =  ", squared_error.item())

d. 

In [None]:
def f(x):
    """Calculates the function f(x) = 2 * x^T * x"""
    return 2 * tc.matmul(x.t(), x)

# Compute the Jacobian-vector product of f 
G = tc.autograd.functional.jvp(f, (inputs[:5, :], ), (tc.rand_like(inputs[:5, :]), ))
print(G)