In [None]:
import pandas as pd
import numpy as np

# city,area,rooms,bathroom,parking spaces,floor,animal,furniture,hoa (R$),rent amount (R$),property tax (R$),fire insurance (R$),total (R$)
df = pd.read_csv("rentals.csv")

area = df["area"]
rent_amount = df["rent amount (R$)"]
fire_insurance = df["fire insurance (R$)"]

mean_area = np.mean(area)
mean_rent_amount = np.mean(rent_amount)
mean_fire_insurance = np.mean(fire_insurance)
df.head()

#Helper function to return number to 4 decimal places
def three_decimals(n):
    return float(f'{n:.4f}')

def custom_cov(dataset1, dataset2):
    dataset1_mean = np.mean(dataset1)
    dataset2_mean = np.mean(dataset2)
    start = 0
    total = 0
    
    while start < len(dataset1):
        total += (dataset1[start] - dataset1_mean) * (dataset2[start] - dataset2_mean)
        start += 1
        
    return total / (len(dataset1) - 1) # Subtracting 1 matches up to np variance function 

print("my covariance function:", custom_cov(area, area))

print("np covariance function:", np.cov(area, area)[0][1])
print("np variance function:", np.var(area)) # A little of, however if I take out subtracting one for the return in function then matches

"""
X = area
Y = rent amount
Z = fire insurance
"""

# Asserting (X, Y)
assert three_decimals(custom_cov(area, rent_amount)) == three_decimals(np.cov(area, rent_amount)[0][1])
# Asserting (X, Z)
assert three_decimals(custom_cov(area, fire_insurance)) == three_decimals(np.cov(area, fire_insurance)[0][1])
# Asserting (Y, Z)
assert three_decimals(custom_cov(rent_amount, fire_insurance)) == three_decimals(np.cov(rent_amount, fire_insurance)[0][1])
# Asserting (X, X)
assert three_decimals(custom_cov(area, area)) == three_decimals(np.cov(area, area)[0][1])
# Asserting (Y, Y)
assert three_decimals(custom_cov(rent_amount, rent_amount)) == three_decimals(np.cov(rent_amount, rent_amount)[0][1])
# Asserting (Z, Z)
assert three_decimals(custom_cov(fire_insurance, fire_insurance)) == three_decimals(np.cov(fire_insurance, fire_insurance)[0][1])
## All assertions passed without errors

In [None]:
def covariance_matrix(data):
    data = data.T
    matrix = []
    for i in range(len(data)):
        matrix.append([])
        x_dataset = data[i]
        for j in range(len(data)):
            y_dataset = data[j]
            matrix[i].append(custom_cov(x_dataset, y_dataset))
            
    return matrix

dataset = np.array([[1, 1, 1], [1, 2, 1], [1, 3, 2], [1, 4, 3]])

print(covariance_matrix(dataset))

In [None]:
# Get columns of data
columns = df[["area", "fire insurance (R$)", "rent amount (R$)"]]

def cov_matrix_calculation(data):
    # calculate covariance matrix of the data
    cov_matx = np.cov(data, rowvar=False) # or can just use data.T as input
    return cov_matx

dataset = np.array([[1, 1, 1], [1, 2, 1], [1, 3, 2], [1, 4, 3]])
print(cov_matrix_calculation(dataset))
print("\n matrix for my rental property data")
print(cov_matrix_calculation(columns))
print("\n\n\n\n")
print(covariance_matrix(columns))