# Descriptive statistics problems

In [19]:
# Import the necessary libraries:
import numpy as np
from scipy import stats

### Exercise 1

We will use NumPy to obtain information to describe statistically.

- Generate an array of 100 elements following a normal distribution.
- Generate an array of 100 elements following a chi-square distribution with 3 degrees of freedom.
- Calculate the main metrics and statistical measures that best describe the two vectors.

For this exercise, as the same parameters need to be computed for two distributions, different functions for each parameter will be declared.

In [20]:
# # # DISTRIBUTIONS DECLARATION # # #

# Definition of the number of elements in both distributions: 
n_elements = 100;

# Definition of degrees of freedom for Chi Squared Distribution: 
degrees_freedom = 3;

# Normal Distribution: 
normal_distribution = np.random.randn(n_elements);

# Chi-Square Distribution: 
chiSquare_distribution = np.random.chisquare(df = degrees_freedom, size = n_elements);

# Mean:

Simple application of the following formula:

![image.png](attachment:image.png)

In [21]:
# # # MEAN # # #
# Function that receives the distribution as an argument, calculates its mean and prints the computed result and Numpy Function's one (simple application of the formula):
def mean_cal (distribution):
    summation = 0;
    for element in distribution: summation += element;
    mean = np.round(summation/len(distribution), decimals = 3);
    print (f" Step-by-Step Mean: {mean}.\n Straight Function Mean: {np.round(np.mean(distribution), decimals = 3)}\n");
    return mean;

# Mean Function Call:
print(f" Normal Distribution - Mean:");
normal_mean = mean_cal(normal_distribution);
print(f" Chi-Square Distribution - Mean:");
chiSquare_mean = mean_cal(chiSquare_distribution);

 Normal Distribution - Mean:
 Step-by-Step Mean: -0.009.
 Straight Function Mean: -0.009

 Chi-Square Distribution - Mean:
 Step-by-Step Mean: 2.74.
 Straight Function Mean: 2.74



# Standard Deviation:

Simple application of the following formula:

![image-2.png](attachment:image-2.png)

In [22]:
# # # STANDARD DEVIATION # # #
# Function that receives the distribution and its mean as an arguments, calculates the STD and prints the computed result and Numpy Function's one (simple application of the formula):
def std_cal (distribution, mean):
    summation = 0;
    for element in distribution: summation += (element - mean)**2;
    std = np.round((summation/len(distribution))**0.5, decimals = 3);
    print (f" Step-by-Step STD: {std}\n Straight Function STD: {np.round(np.std(distribution), decimals = 3)}\n");
    return std;

# Standard Deviation Function Call: 
print(f" Normal Distribution - STD:");
normal_std = std_cal(normal_distribution, normal_mean);
print(f" Chi-Square Distribution - STD:");
chiSquare_std = std_cal(chiSquare_distribution, chiSquare_mean);

 Normal Distribution - STD:
 Step-by-Step STD: 0.896
 Straight Function STD: 0.896

 Chi-Square Distribution - STD:
 Step-by-Step STD: 2.03
 Straight Function STD: 2.03



# Variance:

Simple application of the following formula, which implies variance to be the square of the Standard Deviation:

![image.png](attachment:image.png)

In [23]:
# # # VARIANCE # # #
# Function that receives the distribution and its mean as an arguments, calculates the Variance and prints the computed result and Numpy Function's one (simple application of the formula):
def var_cal (distribution, std):
    var = np.round(std**2, decimals = 3); # Variance = STD Square
    print (f" Step-by-Step Variance: {var}\n Straight Function Variance: {np.round(np.var(distribution), decimals = 3)}\n");
    return var;

# Variance Function Call: 
print(f" Normal Distribution - Variance:");
normal_var = var_cal(normal_distribution, normal_std);
print(f" Chi-Square Distribution - Variance:");
chiSquare_var = var_cal(chiSquare_distribution, chiSquare_std);

 Normal Distribution - Variance:
 Step-by-Step Variance: 0.803
 Straight Function Variance: 0.803

 Chi-Square Distribution - Variance:
 Step-by-Step Variance: 4.121
 Straight Function Variance: 4.121



# Skewness:

Simple application of the following formula: 

![image.png](attachment:image.png)

In [31]:
# # # SKEWNESS # # #
# Function that receives the distribution, its mean and its STD as an arguments, calculates the Skewness and prints the computed result and Scipy's one (simple application of the formula):
def skew_cal (distribution, mean, std):
    length = len(distribution);
    summation = 0;
    for element in distribution: summation += (element - mean)**3;
    skew = np.round(summation / ((length-1) * (std**3)), decimals = 3);
    py_skew = (stats.skew(distribution)).round(2);
    print (f" Step-by-Step Skewness: {skew}\n Straight Function Skewness: {py_skew}\n");
    return skew;

# Skewness Function Call: 
print(f" Normal Distribution - Skewness:");
normal_skew = skew_cal(normal_distribution, normal_mean, normal_std);
print(f" Chi-Square Distribution - Skewness:");
chiSquare_skew = skew_cal(chiSquare_distribution, chiSquare_mean, chiSquare_std);

 Normal Distribution - Skewness:
 Step-by-Step Skewness: -0.06
 Straight Function Skewness: -0.06

 Chi-Square Distribution - Skewness:
 Step-by-Step Skewness: 0.94
 Straight Function Skewness: 0.93



# Kurtosis:

Simple application of the following formula:

![image-2.png](attachment:image-2.png)

In [25]:
# # # KURTOSIS # # #
# Function that receives the distribution and its mean as an arguments, calculates the Kurtosis and prints the computed result and Scipy's one (simple application of the formula):
def kurt_cal (distribution, mean):
    length = len(distribution);

    # Numerator: 
    summation_num = 0;
    for element in distribution : summation_num += (element - mean)**4;
    summation_num = (1/length)*summation_num;

    # Denominator:
    summation_den = 0;
    for element in distribution : summation_den += (element - mean)**2;
    summation_den = (summation_den / length)**2;

    kurt = np.round (summation_num / summation_den, decimals=3);
    py_kurt = stats.kurtosis(distribution, fisher=False).round(3);
    
    print (f" Step-by-Step Kurt: {kurt}\n Straight Function Kurt: {py_kurt}\n");

    return kurt;

# Kurtosis Function Call: 
print(f" Normal Distribution - Kurtosis:");
normal_kurt = kurt_cal(normal_distribution, normal_mean);
print(f" Chi-Square Distribution - Kurtosis:");
chiSquare_kurt = kurt_cal(chiSquare_distribution, chiSquare_mean);

 Normal Distribution - Kurtosis:
 Step-by-Step Kurt: 2.339
 Straight Function Kurt: 2.339

 Chi-Square Distribution - Kurtosis:
 Step-by-Step Kurt: 3.376
 Straight Function Kurt: 3.377



# Median:

Simple application of the following formula: 

![image.png](attachment:image.png)

In [26]:
# # # MEDIAN # # #
# Function that receives the distribution, calculates the Median and prints the computed result and Numpy Function's one (simple application of the formula):
def median_cal (distribution):
    # Sort all the elements so that the one in the middle can be found:
    sorted_distribution = np.sort(distribution);

    length = len(sorted_distribution);

    # If the total number of elements is odd, the median is exactly the one in the middle:
    if (length % 2 != 0):
        median = sorted_distribution[int((length + 1) / 2)];
    
    # If the total number of elements is even, the median is the mean among the one in the middle and the next one:
    else:
        median = (sorted_distribution[int(length / 2)] + sorted_distribution[int((length/2) + 1)]) / 2;

    print (f" Step-by-Step Median: {np.round(median, decimals = 2)}\n Straight Function Median: {np.round(np.median(distribution), decimals = 2)}\n");   
    return median;

# Median Function Call: 
print(f" Normal Distribution - Median:");
normal_median = median_cal(normal_distribution);
print(f" Chi-Square Distribution - Median:");
chiSquare_median = median_cal(chiSquare_distribution);

 Normal Distribution - Median:
 Step-by-Step Median: 0.0
 Straight Function Median: -0.02

 Chi-Square Distribution - Median:
 Step-by-Step Median: 2.35
 Straight Function Median: 2.32



# Mode:

- There is no formula to simply obtain the mode.

- To compute the mode, the elements of the distribution are first sorted, so all equal numbers are one after the other.

- All elements in the distribution are evaluated so that if the "actual" element is equal to the previous one (as they are sorted) we can sum how many times a number is in the distribution using a counter.

- Once it is found an element that is not equal to the previous one, the counter must be compared to the one storing the maximum times a number has been found till that moment.

- If this is evaluation is done for all elements in the distribution, the mode is found as such element whose counter has finally been the maximum one

In [27]:
# # # MODE # # #
# Function that receives the distribution, calculates the Mode and prints the computed result and Scipy's one (no formula exits, own method)
def mode_cal (distribution):
    # Sort the distribution:
    sorted_distribution = np.sort(distribution);

    # Counter for each element within the distribution:
    count_each = 0;

    # Counter that stores the maximum count:
    count_max = 0;

    # Iterate for each position in the distribution:
    for i in range (len(sorted_distribution)):

        # For the first element, as it cannot be compared to anything, the counter is just updated:
        if i == 0:
            count_each += 1;
        
        # For all rest of elements:
        else:
            # If the "actual" element is equal to the previous one, the counter is updated:
            if sorted_distribution [i] == sorted_distribution [i-1]:
                count_each += 1;
            
            # If the "actual" element is different from the previous one, the current counter must be compared to the maximum counter stored until the moment:
            else:
                # If it is found that the current counter is higher, the maximum counter is updated to the current one:
                if count_each > count_max:
                    count_max = count_each;
                    mode = sorted_distribution [i-1]; # The mode is saved as the value stored in the previous element (where the counter has been found maximum)
                # The counter is initialized for the current element:
                count_each = 1;
    
    # It is computed the STATS Library mode, obtaining an array whose first position stores the mode and the second one the occurencies:
    py_mode = stats.mode(distribution);
    
    print (f" Step-by-Step Mode: {np.round(mode, decimals = 2)}\n Straight Function Mode: {(py_mode[0]).round(2)}\n");           
    return mode;


# Mode Function Call: 
print(f" Normal Distribution - Mode:");
normal_mode = mode_cal(normal_distribution);
print(f" Chi-Square Distribution - Mode:");
chiSquare_mode = mode_cal(chiSquare_distribution);


 Normal Distribution - Mode:
 Step-by-Step Mode: -2.43
 Straight Function Mode: -2.43

 Chi-Square Distribution - Mode:
 Step-by-Step Mode: 0.07
 Straight Function Mode: 0.07



### Exercise 2

Write a Python program to calculate the standard deviation of the following data:

```py
data = [4, 2, 5, 8, 6]
```

To compute the Standard Deviation, it is used the following formula:

![image.png](attachment:image.png)

In [28]:
# Declare the list:
data = [4, 2, 5, 8, 6];

# Compute the mean to apply the STD Formula:
mean_summation = 0;
for element in data: mean_summation += element;
mean = np.round(mean_summation/len(data), decimals = 3);

# Declare a variable to store the summation of the STD Formula:
std_summation = 0;

# Iterate all over the elements in the list to apply the STD Formula:
for element in data: std_summation += (element - mean)**2;
std = (std_summation/len(data))**0.5;

print (f" Step-by-Step STD: {std}\n Straight Function STD: {np.std(data)}\n");



 Step-by-Step STD: 2.0
 Straight Function STD: 2.0

