In [2]:
import numpy as np

def compute_mean_and_variance(vector):
    n = len(vector)
    mean = sum(vector) / n
    variance = sum((x - mean) ** 2 for x in vector) / n
    return mean, variance

def main():
    n = int(input("Enter n: "))
    feature_vector = np.random.rand(1, n)
    feature_vector = list(feature_vector[0])
    mean, variance = compute_mean_and_variance(feature_vector)
    print("Feature Vector:", feature_vector)
    print("Mean:", mean)
    print("Variance:", variance)

if __name__ == "__main__":
    main()
    

Enter n: 5
Feature Vector: [0.0436438782063594, 0.41790782023228323, 0.6233679368817107, 0.9214615620275266, 0.45732229595089635]
Mean: 0.4927406986597552
Variance: 0.08188148622756804


# Mean (Sample Mean):

The mean of a sample is the average value of the data points in that sample.
It is calculated by summing up all the values in the sample and dividing by the number of observations.
The sample mean provides a measure of central tendency, representing the "typical" value in the sample.
It's often used as a representative value to summarize the data set.

# Variance (Sample Variance):

The variance of a sample measures how much the values in the sample vary from the sample mean.
It quantifies the spread or dispersion of the data points around the sample mean.
To calculate the variance, the differences between each data point and the sample mean are squared, summed, and then divided by the number of observations minus one (N - 1).
A higher variance indicates that the data points are more spread out from the mean, while a lower variance suggests that the data points are closer to the mean.
Variance is a fundamental measure of variability and is widely used in statistics and data analysis to understand the distribution of data.

In [4]:
def compute_covariance(vector1, vector2):
    if len(vector1) != len(vector2):
        raise ValueError("Vectors must have the same length")

    n = len(vector1)
    mean1 = sum(vector1) / n
    mean2 = sum(vector2) / n
    pairwise_diff_sum = sum((vector1[i] - mean1) * (vector2[i] - mean2) for i in range(n))
    covariance = pairwise_diff_sum / (n - 1)
    return covariance

vector1 = [1, 2, 3, 4, 5]
vector2 = [5, 4, 3, 2, 1]

covariance = compute_covariance(vector1, vector2)
print("Covariance:", covariance)


Covariance: -2.5


# The covariance between two vectors represents the degree to which the elements of the vectors vary together. More specifically:

If the covariance is positive, it indicates that when one vector has a high value, the other tends to have a high value as well. Similarly, when one vector has a low value, the other tends to have a low value.

If the covariance is negative, it indicates that when one vector has a high value, the other tends to have a low value, and vice versa.

If the covariance is close to zero, it suggests that there is little to no linear relationship between the two vectors.

In [5]:
import numpy as np

def compute_correlation(vector1, vector2):
    if len(vector1) != len(vector2):
        raise ValueError("Vectors must have the same length")
    mean1 = np.mean(vector1)
    mean2 = np.mean(vector2)

    covariance = np.sum((vector1 - mean1) * (vector2 - mean2)) / len(vector1)

    std_dev1 = np.sqrt(np.sum((vector1 - mean1) ** 2) / len(vector1))
    std_dev2 = np.sqrt(np.sum((vector2 - mean2) ** 2) / len(vector2))

    correlation = covariance / (std_dev1 * std_dev2)
    return correlation


vector1 = np.array([1, 2, 3, 4, 5])
vector2 = np.array([5, 4, 3, 2, 1])

correlation = compute_correlation(vector1, vector2)
print("Correlation:", correlation)


Correlation: -0.9999999999999998


# Correlation: 
Correlation is a statistical measure that describes the extent to which two variables are linearly related to each other. Specifically, the Pearson correlation coefficient, which is what we computed here, ranges from -1 to 1:
A correlation of 1 indicates a perfect positive linear relationship, meaning that as one variable increases, the other also increases proportionally.
A correlation of -1 indicates a perfect negative linear relationship, meaning that as one variable increases, the other decreases proportionally.
A correlation of 0 indicates no linear relationship between the variables.

In [6]:
import numpy as np

def compute_covariance_matrix(data):
    covariance_matrix = np.cov(data, rowvar=True)
    return covariance_matrix

def compute_correlation_matrix(data):
    correlation_matrix = np.corrcoef(data, rowvar=True)
    return correlation_matrix

data = np.array([[1, 2, 3], 
                 [4, 5, 6], 
                 [7, 8, 9], 
                 [10, 11, 12]])

covariance_matrix = compute_covariance_matrix(data)
print("Covariance Matrix:")
print(covariance_matrix)

correlation_matrix = compute_correlation_matrix(data)
print("\nCorrelation Matrix:")
print(correlation_matrix)


Covariance Matrix:
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]

Correlation Matrix:
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
