# Exercises: Level 1

Python has the module called statistics and we can use this module to do all the statistical calculations. However, to learn how to make function and reuse function let us try to develop a program, which calculates the measure of central tendency of a sample (mean, median, mode) and measure of variability (range, variance, standard deviation). In addition to those measures, find the min, max, count, percentile, and frequency distribution of the sample. You can create a class called Statistics and create all the functions that do statistical calculations as methods for the Statistics class. Check the output below.

In [8]:
from collections import Counter
from math import sqrt

class Statistics:
    def __init__(self, data):
        self.data = data

    def count(self):
        return len(self.data)

    def sum(self):
        total = 0
        for num in self.data:
            total += num
        return total

    def mean(self):
        return self.sum() / self.count()

    def median(self):
        sorted_data = sorted(self.data)
        n = len(sorted_data)
        mid = n // 2
        return sorted_data[mid] if n % 2 != 0 else (sorted_data[mid - 1] + sorted_data[mid]) / 2

    def mode(self):
        frequency = Counter(self.data)
        mode_value = max(frequency.values())
        return [key for key, value in frequency.items() if value == mode_value]

    def data_range(self):
        return max(self.data) - min(self.data)

    def variance(self):
        mean_value = self.mean()
        return sum((x - mean_value) ** 2 for x in self.data) / len(self.data)

    def standard_deviation(self):
        return sqrt(self.variance())

    def min_value(self):
        return min(self.data)

    def max_value(self):
        return max(self.data)

    def percentile(self, percentile_rank):
        sorted_data = sorted(self.data)
        k = (len(sorted_data) - 1) * (percentile_rank / 100)
        f = int(k)
        c = k - f
        if f + 1 < len(sorted_data):
            return sorted_data[f] + c * (sorted_data[f + 1] - sorted_data[f])
        return sorted_data[f]

    def frequency_distribution(self):
        return dict(Counter(self.data))


# Sample data
ages = [31, 26, 34, 37, 27, 26, 32, 32, 26, 27, 27, 24, 32, 33, 27, 25, 26, 38, 37, 31, 34, 24, 33, 29, 26]

# Create an instance of the Statistics class
data = Statistics(ages)

# Printing results
print('Count:', data.count())  # 25
print('Sum: ', data.sum())  # 744
print('Min: ', data.min_value())  # 24
print('Max: ', data.max_value())  # 38
print('Range: ', data.data_range())  # 14
print('Mean: ', data.mean())  # 30
print('Median: ', data.median())  # 29
print('Mode: ', {'mode': data.mode(), 'count': Counter(data.data)[data.mode()[0]]})  # {'mode': 26, 'count': 5}
print('Standard Deviation: ', round(data.standard_deviation(), 1))  # 4.2
print('Variance: ', round(data.variance(), 1))  # 17.5
print('Frequency Distribution: ', sorted(data.frequency_distribution().items(), key=lambda x: x[1], reverse=True))


Count: 25
Sum:  744
Min:  24
Max:  38
Range:  14
Mean:  29.76
Median:  29
Mode:  {'mode': [26], 'count': 5}
Standard Deviation:  4.2
Variance:  17.5
Frequency Distribution:  [(26, 5), (27, 4), (32, 3), (31, 2), (34, 2), (37, 2), (24, 2), (33, 2), (25, 1), (38, 1), (29, 1)]
