In [3]:

# from lab_utils_uni import plt_house_x, plt_contour_wgrad, plt_divergence, plt_gradients
import math
import copy
import numpy as np
import matplotlib.pyplot as plot
plot.style.use('./utils_plot/deeplearning.mplstyle')


In [7]:
def compute_cost(x, y, w, b): 
    """
    Computes the cost function for linear regression. 

    ndarray: N-dimensional array: https://numpy.org/doc/stable/reference/arrays.ndarray.html

    Args: 
        x (ndarray (m,)):       data, m examples
        y (ndarray, (m,)):      target values
        w, b (scalar):          model parameters
    
    Returns 
        total_cost (float):     the cost of using w, b as the parameters for linear regression 
                                to fit the data points in x and y 
    """
    # number of the training examples
    m = x.shape[0]
    cost_sum = 0
    for i in range(m): 
        # given training example, and parameters w, b, calculate prediction (the value of the function)
        f_wb = w * x[i] + b

        # the difference b/w the prediction of the tested model (f_wb), 
        # and the actual value for the given example (y[i])
        diff = f_wb - y[i]

        # calculate cost - because diff can be a negative number, it needs to be squared, 
        # and because it's squared, the cost increases rapidly when w is too small or too large
        cost = diff ** 2

        # store cost sum for the current iteration
        cost_sum = cost_sum + cost

    # calculate the total cost for the given model parameters - J(w, b)
    total_cost = (1 / 2 * m) * cost_sum 
    return total_cost


Given our original data stored as json, we need to sanitize the data: 
- from the list of features of each home, leave price, area and id
- remove those homes, that has no price or no area
- parse the values 


In [1]:
import json

with open('../../output/pricePerArea-Sassari.json') as sassari_houses:
    file_contents = sassari_houses.read()
  
parsed_json = json.loads(file_contents)
print(parsed_json)

[[{'price': '€ 60.000', 'area': '168 m², esterna 974 m²\n                                         - Vedi dettaglio', 'id': 'EK-99220190 - 04/11/2022'}, {'price': '€ 64.000', 'area': '80 m²', 'id': '60622858 - 03/11/2022'}, {'price': '€ 64.000', 'area': '80 m²', 'id': '60621836 - 04/01/2023'}, {'price': '€ 65.000', 'area': '121 m² | commerciale 124 m²\n                                         - Vedi dettaglio', 'id': '18538758 - 01/09/2021'}, {'price': '€ 65.000', 'area': '250 m²', 'id': '40293092 - 28/11/2022'}, {'price': '€ 65.000', 'area': '30 m²', 'id': 'EK-99925384 - 12/12/2022'}, {'price': '€ 68.000', 'area': '105 m²\n                                         - Vedi dettaglio', 'id': '22111104 - 10/12/2022'}, {'price': '€ 69.000', 'area': '50 m²', 'id': '296504 - 11/01/2023'}, {'price': '€ 69.000', 'area': '32 m²', 'id': '23354189 - 24/09/2022'}, {'price': '€ 69.000', 'area': '50 m², esterna 15 m²\n                                         - Vedi dettaglio', 'id': 'EK-70753168 - 06/

Now that we have only the data we need in json format, let's convert it into python array of floats: 

In [None]:
# Load our data set
x_training_examples = np.array([1.0, 2.0])   # stores features
y_training_examples = np.array([300.0, 500.0])   # stores targets
