# Descriptive statistics problems

### Exercise 1

We will use NumPy to obtain information to describe statistically.

- Generate an array of 100 elements following a normal distribution.
- Generate an array of 100 elements following a chi-square distribution with 3 degrees of freedom.
- Calculate the main metrics and statistical measures that best describe the two vectors.

In [19]:
import numpy as np
import pandas as pd
import statistics as stats

np.random.seed(42)

# Generate 100 elements from a normal distribution (mean=0, std=1)
normal_array = np.random.normal(size=100)

# Generate 100 elements from a chi-square distribution (3 degrees of freedom)
chi_square_array = np.random.chisquare(df=3,size=100)

# Calculate metrics for both distributions
df = pd.DataFrame({
    'Normal Distribution': normal_array,
    'Chi-square Distribution': chi_square_array
})

descritive_statistics = df.describe()

print(descritive_statistics)


       Normal Distribution  Chi-square Distribution
count           100.000000               100.000000
mean             -0.103847                 2.938080
std               0.908168                 2.423997
min              -2.619745                 0.132473
25%              -0.600906                 1.091695
50%              -0.126956                 2.463615
75%               0.405952                 3.982606
max               1.852278                12.724563


# Measures of central tendency

In [27]:
import statistics as stats

# Generate 100 elements from a normal distribution (mean=0, std=1)
normal_array = np.random.normal(size=100)

# Generate 100 elements from a chi-square distribution (3 degrees of freedom)
chi_square_array = np.random.chisquare(df=3,size=100)

def calculate_measures(array):
    measures_central_tendency = {
        'Mean': stats.mean(array),
        'Median': stats.median(array),
        'Mode ': stats.mode(array)
    }
    return measures_central_tendency

""" to calculate the measures of central tendency for both distributions: 
normal and chi-square """

measures_normal = calculate_measures(normal_array)
measures_chi_square = calculate_measures(chi_square_array)

""" to create a DataFrame to compare """

df = pd.DataFrame({
    "Normal Distribution" : measures_normal,
    "Chi-square Distribution" : measures_chi_square
})

""" to show """
print(df)


        Normal Distribution  Chi-square Distribution
Mean               0.233492                 2.617745
Median             0.275586                 2.111184
Mode              -0.925621                 1.939486


# Measures of dispersion

In [28]:
import statistics as stats

# Generate 100 elements from a normal distribution (mean=0, std=1)
normal_array = np.random.normal(size=100)

# Generate 100 elements from a chi-square distribution (3 degrees of freedom)
chi_square_array = np.random.chisquare(df=3,size=100)

def calculate_measures(array):
    measures_dispersion = {
        'Range': max(array) - min(array),
        'Variance': stats.variance(array),
        'Standard deviation ': stats.stdev(array)
    }
    return measures_dispersion

""" to calculate the measures of central tendency for both distributions: 
normal and chi-square """

measures_normal = calculate_measures(normal_array)
measures_chi_square = calculate_measures(chi_square_array)

""" to create a DataFrame to compare """

df = pd.DataFrame({
    "Normal Distribution" : measures_normal,
    "Chi-square Distribution" : measures_chi_square
})

""" to show """
print(df)

                     Normal Distribution  Chi-square Distribution
Range                           5.133621                 9.553321
Variance                        1.078156                 4.553064
Standard deviation              1.038343                 2.133791


# Shape measures

In [29]:
from scipy.stats import skew 
from scipy.stats import kurtosis
# Generate 100 elements from a normal distribution (mean=0, std=1)
normal_array = np.random.normal(size=100)

# Generate 100 elements from a chi-square distribution (3 degrees of freedom)
chi_square_array = np.random.chisquare(df=3,size=100)

def calculate_measures(array):
    shape_measures = {
        'Skew': skew(array),
        'Kurtosis': kurtosis(array)
    }
    return shape_measures

""" to calculate the measures of central tendency for both distributions: 
normal and chi-square """

measures_normal = calculate_measures(normal_array)
measures_chi_square = calculate_measures(chi_square_array)

""" to create a DataFrame to compare """

df = pd.DataFrame({
    "Normal Distribution" : measures_normal,
    "Chi-square Distribution" : measures_chi_square
})

""" to show """
print(df)

          Normal Distribution  Chi-square Distribution
Skew                -0.246385                 1.373333
Kurtosis             0.710921                 1.747343


### Exercise 2

Write a Python program to calculate the standard deviation of the following data:

```py
data = [4, 2, 5, 8, 6]
```

In [31]:
import math

data = [4, 2, 5, 8,6]

# function to calculate the average
def calculate_mean(data):
    n = len(data)
    if n <= 1:
        return data[0]
    mean = sum(data) / n  # Add the elements and divide by the size of the list
    return mean

# Function to calculate the sample standard deviation
def calculate_standard_deviation(data):
    n = len(data)
    if n <= 1:
        return 0.0

    mean = calculate_mean(data)
    variance = sum((d - mean) ** 2 for d in data) / (n - 1)  # Calculation of variance
    standard_deviation = math.sqrt(variance)  # Square root of variance = standard deviation

    return standard_deviation


# Results
print(f"Sample Data: {data}")
print(f"Mean: {calculate_mean(data)}")
print(f"Standard Deviation: {calculate_standard_deviation(data)}")


Sample Data: [4, 2, 5, 8, 6]
Mean: 5.0
Standard Deviation: 2.23606797749979
