In [1]:
import numpy as np
import pandas as pd

In [2]:
class covariance:
    def __init__(self):
        self.x_mean=None
        self.y_mean=None
        self.feat_len=None
    
    def cov(self,x,y):
        if x.shape[0]!=y.shape[0]:
            raise Exception("Data dimensions not matching")
        self.x_mean=np.mean(x)
        self.y_mean=np.mean(y)
        self.feat_len=x.shape[0]
        return np.sum((x-self.x_mean)*(y-self.y_mean))/(self.feat_len-1)
    
    


In [3]:
np.random.seed(42)
n_samples = 1000000
x = np.random.rand(n_samples) * 100  # Random numbers between 0 and 100
y = np.random.rand(n_samples) * 50   # Random numbers between 0 and 50

# Custom covariance
cov_calc = covariance()
custom_cov = cov_calc.cov(x, y)

# NumPy covariance
numpy_cov = np.cov(x, y, bias=False)[0, 1]

# Print results
print("Custom Covariance:", custom_cov)
print("NumPy Covariance:", numpy_cov)
print("Are they equal?", np.isclose(custom_cov, numpy_cov))


Custom Covariance: 0.054772832591662775
NumPy Covariance: 0.05477283259166249
Are they equal? True


In [4]:
class Pearson_corr:
    def __init__(self):
        self.x_mean=None
        self.y_mean=None
        self.x_std=None
        self.y_std=None
        self.feat_len=None
    
    def corr(self,x,y):
        if x.shape[0]!=y.shape[0]:
            raise Exception("Data dimensions not matching")
        self.x_mean=np.mean(x)
        self.y_mean=np.mean(y)
        self.x_std=np.std(x,ddof=1)
        self.y_std=np.std(y,ddof=1)
        self.feat_len=x.shape[0]
        return np.sum((x-self.x_mean)*(y-self.y_mean))/((self.x_std)*(self.y_std)*(self.feat_len-1))
    

In [5]:

x = np.random.rand(100)
y = np.random.rand(100)

pearson = Pearson_corr()
custom_r = pearson.corr(x, y)
numpy_r = np.corrcoef(x, y)[0,1]

print("Custom Pearson:", custom_r)
print("NumPy Pearson:", numpy_r)
print("Almost equal?", np.isclose(custom_r, numpy_r))


Custom Pearson: 0.17799148871951043
NumPy Pearson: 0.17799148871951048
Almost equal? True
