Suppose you are working for a real estate agency, and you want to predict the selling price of houses based on several features such as square footage, number of bedrooms, number of bathrooms, and distance to the city center. You have collected a dataset containing these features as well as the selling prices of houses. Using multiple regression analysis, how would you build a model to predict the selling price of houses, and what factors would you consider when interpreting the coefficients of the model?

In [16]:
import numpy as np
from scipy.stats import linregress
from matplotlib import pyplot as plt
import random
from sklearn import linear_model

In [3]:
# Collected Data
data = {
    'Square_Footage': [1200, 1500, 1800, 1350, 1100],
    'Bedrooms': [2, 3, 4, 3, 2],
    'Bathrooms': [1, 2, 2, 1, 3],
    'Distance_to_City_Center': [5, 10, 2, 8, 3],
    'Selling_Price': [200000, 250000, 300000, 220000, 180000]
}

In [4]:
# Training data
dependent_training_variable = data['Selling_Price']
print(dependent_training_variable)

[200000, 250000, 300000, 220000, 180000]


In [5]:
# Predictors
sqft = data['Square_Footage']
bedrooms = data['Bedrooms']
bathrooms = data['Bathrooms']
distance_to_city = data['Distance_to_City_Center']

list_of_independent_variables = [
    sqft,
    bedrooms,
    bathrooms,
    distance_to_city
]
print(list_of_independent_variables)

[[1200, 1500, 1800, 1350, 1100], [2, 3, 4, 3, 2], [1, 2, 2, 1, 3], [5, 10, 2, 8, 3]]


In [6]:
# Calculate Coefficient(s)

def slope(independent_variables: list, dependant_variables: list = dependent_training_variable):
    
    # Running Validations
    print("Running Validations...")
    if len(independent_variables) != len(dependant_variables):
        raise TypeError (f"{independent_variables} should have: {len(dependant_variables)} observations.\nYours had: {len(independent_variables)}")
    
    for observation in independent_variables:
        if not isinstance(observation, int):
            raise TypeError (f"{independent_variables} should have intager values.\n{observation} is of type: {type(observation)}.")
    
    print("Validations Passed...")
    print("Proceeding to arithmetic opperations...")


    y_1 = independent_variables[len( independent_variables) // 2]
    y_2 = independent_variables[(len( independent_variables) + 2) // 2]

    x_1 = dependant_variables[len(dependant_variables) // 2]
    x_2 = dependant_variables[(len(dependant_variables) + 2) // 2]

    change_in_y = (y_1 - y_2)
    change_in_x = (x_1 - x_2)
    
    print(f"Change in y: {change_in_y}")
    print(f"Change in x: {change_in_x}")

    gradient_coefficient = change_in_y / change_in_x

    print(f"The coefficient of {independent_variables} and {dependant_variables} is: {gradient_coefficient}\n")

    return gradient_coefficient

In [19]:
# Slope Calculation
slope_of_sqft = slope(sqft)
slope_of_bedrooms = slope(bedrooms)
slope_of_bathrooms = slope(bathrooms)
slope_of_distance_to_city = slope(distance_to_city)

list_of_slopes = [
    slope_of_sqft,
    slope_of_bedrooms,
    slope_of_bathrooms,
    slope_of_distance_to_city,
                  ]


Running Validations...
Validations Passed...
Proceeding to arithmetic opperations...
Change in y: 450
Change in x: 80000
The coefficient of [1200, 1500, 1800, 1350, 1100] and [200000, 250000, 300000, 220000, 180000] is: 0.005625

Running Validations...
Validations Passed...
Proceeding to arithmetic opperations...
Change in y: 1
Change in x: 80000
The coefficient of [2, 3, 4, 3, 2] and [200000, 250000, 300000, 220000, 180000] is: 1.25e-05

Running Validations...
Validations Passed...
Proceeding to arithmetic opperations...
Change in y: 1
Change in x: 80000
The coefficient of [1, 2, 2, 1, 3] and [200000, 250000, 300000, 220000, 180000] is: 1.25e-05

Running Validations...
Validations Passed...
Proceeding to arithmetic opperations...
Change in y: -6
Change in x: 80000
The coefficient of [5, 10, 2, 8, 3] and [200000, 250000, 300000, 220000, 180000] is: -7.5e-05



In [8]:
print(list_of_slopes)

[0.005625, 1.25e-05, 1.25e-05, -7.5e-05]


In [9]:
# Calculate Mean of Each List in Array
def mean_calculator(arry: list):
    collected_means = []

    for sublist in arry:
        mean_of_each_x_value = sum(sublist) / len(sublist)
        collected_means.append(mean_of_each_x_value)

    return collected_means

# Calculate Intercept
def intercept(independent_variables: list, dependent_variable: list = dependent_training_variable, list_of_slopes: list = list_of_slopes):
    slope_and_x_mean_multiples = []
    position = 0
    number_of_operations = len(independent_variables)

    mean_of_y = sum(dependent_variable) / len(dependent_variable)
    mean_of_x_values = mean_calculator(independent_variables)

    subtracted_value_slope_and_mean = list_of_slopes[0] * mean_of_x_values[0]
    
    number_of_slope_values = len(list_of_slopes)
    mean_of_independent_values = sum(sum(sublist) for sublist in independent_variables) / sum(len(sublist) for sublist in independent_variables)
    multiple_of_n_slope_and_mean_of_x = number_of_slope_values * mean_of_independent_values

    while number_of_operations > 0:
        occurrence = list_of_slopes[position] * mean_of_x_values[position]
        slope_and_x_mean_multiples.append(occurrence)
        
        position += 1
        number_of_operations -= 1
    
    for i in range(1, len(slope_and_x_mean_multiples)):
        subtracted_value_slope_and_mean -= slope_and_x_mean_multiples[i]

    intercept_for_multiple_regression = mean_of_y - subtracted_value_slope_and_mean - multiple_of_n_slope_and_mean_of_x
    print(f"The intercept is: {intercept_for_multiple_regression}")

    return intercept_for_multiple_regression


In [18]:
intercept_value = intercept (list_of_independent_variables, dependent_training_variable, list_of_slopes)

The intercept is: 228591.9808875


In [17]:
regr = linear_model.LinearRegression()
regr.fit(list_of_independent_variables, dependent_training_variable)



ValueError: Found input variables with inconsistent numbers of samples: [4, 5]

In [14]:
# Fix Logical Error...
def linear_equation_function(gradient, x = None, intercept = intercept_value):
    if x == None:
        return gradient
    else:
        try:
            return gradient * x + intercept
        except TypeError as e:
            print(type(gradient), type(x), type(intercept))

list_of_y_hut = []

for position, independent_variable in enumerate(list_of_independent_variables):
    print(position, independent_variable)
    gradient = list_of_slopes[position]

    my_model = list(linear_equation_function(gradient, list_of_independent_variables[position]))
    list_of_y_hut.append(my_model)


#



0 [1200, 1500, 1800, 1350, 1100]
<class 'float'> <class 'list'> <class 'float'>


TypeError: 'NoneType' object is not iterable

In [None]:
# Residuals
def residuals():
    pass

In [None]:
# Mordular Calculation