In [3]:
def mean_abs_deviation(data):
    mean = sum(data) / len(data)
    deviation = [abs(x - mean) for x in data]
    return sum(deviation) / len(data)

def median_abs_deviation(data):
    median = sorted(data)[len(data) // 2]
    deviation = [abs(x - median) for x in data]
    return median(deviation)





In [5]:
# example usage
data = [1, 2, 3, 4, 5]

mad = mean_abs_deviation(data)
mad

1.2

In [6]:
medad = median_abs_deviation(data)

TypeError: 'int' object is not callable

In [7]:
import numpy as np

def pearson_correlation(x, y):
    x_mean = np.mean(x)
    y_mean = np.mean(y)
    x_std = np.std(x)
    y_std = np.std(y)
    cov = np.cov(x, y)[0][1]
    return cov / (x_std * y_std)

def spearman_correlation(x, y):
    x_rank = np.argsort(x)
    y_rank = np.argsort(y)
    x_d = np.array([x_rank[i] - y_rank[i] for i in range(len(x))])
    return 1 - (6 * np.sum(x_d**2)) / (len(x) * (len(x)**2 - 1))

# example usage
x = [1, 2, 3, 4, 5]
y = [1, 4, 5, 3, 2]

pearson = pearson_correlation(x, y)
print("Pearson Correlation:", pearson)

spearman = spearman_correlation(x, y)
print("Spearman Correlation:", spearman)


Pearson Correlation: 0.12499999999999997
Spearman Correlation: 0.09999999999999998


In [8]:
def kendall_correlation(x, y):
    n = len(x)
    x_rank = sorted(range(len(x)), key=lambda i: x[i])
    y_rank = sorted(range(len(y)), key=lambda i: y[i])
    concordant_pairs = 0
    discordant_pairs = 0
    for i in range(n):
        for j in range(i+1, n):
            if (x_rank[i] < x_rank[j] and y_rank[i] < y_rank[j]) or (x_rank[i] > x_rank[j] and y_rank[i] > y_rank[j]):
                concordant_pairs += 1
            else:
                discordant_pairs += 1
    return (concordant_pairs - discordant_pairs) / (n * (n - 1) / 2)

# example usage
x = [1, 2, 3, 4, 5]
y = [1, 4, 5, 3, 2]

kendall = kendall_correlation(x, y)
print("Kendall Correlation:", kendall)


Kendall Correlation: 0.0


In [9]:
import numpy as np
from sklearn.covariance import EmpiricalCovariance, MinCovDet

def robust_covariance(X):
    mcd = MinCovDet().fit(X)
    return mcd.covariance_, mcd.location_

# example usage
X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
cov, mean = robust_covariance(X)
print("Covariance:\n", cov)
print("Mean:\n", mean)


Covariance:
 [[11.25 11.25 11.25]
 [11.25 11.25 11.25]
 [11.25 11.25 11.25]]
Mean:
 [5.5 6.5 7.5]




In [10]:
import numpy as np

def cholesky_decomposition(covariance_matrix):
    n = covariance_matrix.shape[0]
    L = np.zeros((n, n))
    for i in range(n):
        for j in range(i + 1):
            s = sum(L[i, k] * L[j, k] for k in range(j))
            if i == j:
                L[i, j] = np.sqrt(covariance_matrix[i, i] - s)
            else:
                L[i, j] = (covariance_matrix[i, j] - s) / L[j, j]
    return L

# example usage
cov = np.array([[1, 0.5, 0.2], [0.5, 1, 0.3], [0.2, 0.3, 1]])
L = cholesky_decomposition(cov)
print("Cholesky Decomposition:\n", L)


Cholesky Decomposition:
 [[1.         0.         0.        ]
 [0.5        0.8660254  0.        ]
 [0.2        0.23094011 0.95219046]]


In [11]:
import numpy as np

def covariance_to_correlation(covariance_matrix):
    n = covariance_matrix.shape[0]
    correlation_matrix = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            correlation_matrix[i, j] = covariance_matrix[i, j] / np.sqrt(covariance_matrix[i, i] * covariance_matrix[j, j])
    return correlation_matrix

# example usage
cov = np.array([[1, 0.5, 0.2], [0.5, 1, 0.3], [0.2, 0.3, 1]])
corr = covariance_to_correlation(cov)
print("Correlation Matrix:\n", corr)


Correlation Matrix:
 [[1.  0.5 0.2]
 [0.5 1.  0.3]
 [0.2 0.3 1. ]]


In [12]:
import numpy as np
from scipy.linalg import inv

def partial_correlation(data, var1, var2, controlling_vars):
    X = np.column_stack((data[:, controlling_vars], data[:, [var1, var2]]))
    cov = np.cov(X, rowvar=False)
    n = X.shape[1]
    partial_correlation = -cov[n - 2, n - 1] / np.sqrt(cov[n - 2, n - 2] * cov[n - 1, n - 1])
    return partial_correlation

# example usage
data = np.array([[1, 2, 3, 4, 5], [2, 3, 4, 5, 6], [3, 4, 5, 6, 7], [4, 5, 6, 7, 8]])
pcorr = partial_correlation(data, 3, 4, [0, 1, 2])
print("Partial Correlation Coefficient:", pcorr)


Partial Correlation Coefficient: -1.0


In [13]:
import numpy as np
from scipy.stats import pearsonr

def partial_corr(x, y, covars):
    """
    Calculate the partial correlation coefficient between two variables x and y, 
    adjusting for the effect of a set of covariates.
    
    Parameters:
        x, y (np.array): arrays of the two variables to be correlated
        covars (np.array): array of covariates
    
    Returns:
        tuple: Pearson's correlation coefficient and p-value
    """
    # Calculate the residuals for x and y after regressing out the effect of the covariates
    X = np.column_stack((covars, x))
    beta_x = np.linalg.lstsq(X, y, rcond=None)[0]
    res_x = y - X @ beta_x
    Y = np.column_stack((covars, y))
    beta_y = np.linalg.lstsq(Y, x, rcond=None)[0]
    res_y = x - Y @ beta_y
    
    # Calculate the partial correlation coefficient between the residuals
    corr, p_value = pearsonr(res_x, res_y)
    return corr, p_value


In [14]:
import numpy as np
from scipy.optimize import minimize

def nearest_corr_matrix(matrix):
    """
    Compute the nearest correlation matrix for a given matrix by minimizing the Frobenius distance.
    
    Parameters:
        matrix (np.array): The input matrix to find the nearest correlation matrix for.
        
    Returns:
        np.array: The nearest correlation matrix.
    """
    n = matrix.shape[0]
    
    # Constraints for optimization
    bounds = [(1e-15, 1 - 1e-15) for i in range(n * (n - 1) // 2)]
    
    # Minimize the Frobenius distance between the input matrix and the nearest correlation matrix
    def frobenius_distance(x):
        k = 0
        nearest_matrix = np.zeros((n, n))
        for i in range(n):
            for j in range(i+1, n):
                nearest_matrix[i, j] = x[k]
                nearest_matrix[j, i] = x[k]
                k += 1
        nearest_matrix = (nearest_matrix + nearest_matrix.T) / 2
        nearest_matrix = nearest_matrix + np.eye(n)
        return np.linalg.norm(nearest_matrix - matrix, 'fro')
    
    result = minimize(frobenius_distance, [0.5 for i in range(n * (n - 1) // 2)], bounds=bounds)
    x = result.x
    k = 0
    nearest_matrix = np.zeros((n, n))
    for i in range(n):
        for j in range(i+1, n):
            nearest_matrix[i, j] = x[k]
            nearest_matrix[j, i] = x[k]
            k += 1
    nearest_matrix = (nearest_matrix + nearest_matrix.T) / 2
    nearest_matrix = nearest_matrix + np.eye(n)
    return nearest_matrix
