In [25]:
import pandas as pd, numpy as np
class DataSetStatistics:
    def __init__(self, data):
        self.data = data.select_dtypes(include='number')
        self.length = len(self.data)
    


    def arithmetic_mean(self, column_name):
        if column_name in self.data.columns:
            column_data = self.data[column_name]
            numeric_data = pd.to_numeric(column_data, errors='coerce')
            valid_numeric_data = numeric_data.dropna()

            sum_val = 0
            count = 0
            for val in numeric_data:
                sum_val += val
                count += 1

            return sum_val / count if count > 0 else 0  
        else:
            return None 
    

    def geometric_mean(self, column_name):
        if column_name in self.data.columns:
            column_data = self.data[column_name]
            numeric_data = pd.to_numeric(column_data, errors='coerce')
            valid_numeric_data = numeric_data.dropna()
            product = np.prod(numeric_data)
            return (product ** (1 / len(numeric_data))) if len(numeric_data) > 0 else 0  
        else:
            return None  
    def harmonic_mean(self, column_name):
        if column_name in self.data.columns:
            column_data = self.data[column_name]
            numeric_data = pd.to_numeric(column_data, errors='coerce')
            valid_numeric_data = numeric_data.dropna()

            if 0 in numeric_data.values:
                return 0  
            return len(numeric_data) / sum(1 / num for num in numeric_data) if len(numeric_data) > 0 else 0
        else:
            return None
        
        
    def median(self, column_name):
        if column_name in self.data.columns:
            column_data = sorted(self.data[column_name])
            n = len(column_data)
            if n % 2 == 0:
                median_value = (column_data[n // 2 - 1] + column_data[n // 2]) / 2
            else:
                median_value = column_data[n // 2]
            return median_value
        else:
            return None 
    def variance(self, column_name):
        if column_name in self.data.columns:
            column_data = self.data[column_name]
            numeric_data = pd.to_numeric(column_data, errors='coerce')
            valid_numeric_data = numeric_data.dropna()
        
            mean = self.arithmetic_mean(column_name)  
            sum_squared_diff = sum((x - mean) ** 2 for x in numeric_data)  

            return sum_squared_diff / len(numeric_data) if len(numeric_data) > 0 else 0  
        else:
            return None
    def mode(self, column_name):
        freq = {}
        for num in self.data[column_name]:
            if num not in freq:
                freq[num] = 1
            else:
                freq[num] += 1
        mode_value = max(freq, key=freq.get)
        return mode_value
    
    def standard_deviation(self, column_name):
        return self.variance(column_name) ** 0.5
    
    def max_vlaue(self, column_name):
        return max(self.data[column_name])

    def min_vlaue(self, column_name):
        return min(self.data[column_name])
    
    def range_value(self, column_name):
        max_value = max(self.data[column_name])
        min_value = min(self.data[column_name])

        range_value = max_value - min_value
        return range_value
    def iqr(self, column_name):
        values = list(self.data[column_name])
        n = len(values)
        for i in range(n):
            for j in range(0, n - i - 1):
                if values[j] > values[j + 1]:
                    values[j], values[j + 1] = values[j + 1], values[j]

        q1_index = n // 4
        q3_index = n * 3 // 4

        q1 = values[q1_index]
        q3 = values[q3_index]

        return q3 - q1
    
    def quartiles(self, column_name):
        column = self.data[column_name]
        q1_index = len(column) // 4
        q2_index = len(column) // 2
        q3_index = len(column) * 3 // 4
        
        q1 = column[q1_index]
        q2 = column[q2_index]
        q3 = column[q3_index]

        return q1, q2, q3
    def coefficient_of_range(self, column_name):
        max_value = max(self.data[column_name])
        min_value = min(self.data[column_name])
        range_value = max_value - min_value
        coefficient_of_range_value = range_value / (max_value + min_value)
        return coefficient_of_range_value
    
    def coefficient_of_variation(self, column_name):
        return self.standard_deviation(column_name) / self.arithmetic_mean(column_name)
    
    def coefficient_of_standard_deviation(self, column_name):
        mean = sum(self.data[column_name]) / len(self.data[column_name])
        std_dev = (sum((x - mean) ** 2 for x in self.data[column_name]) / len(self.data[column_name])) ** 0.5
        max_value = max(self.data[column_name])
        min_value = min(self.data[column_name])
        range_value = max_value - min_value
        coefficient_of_std_dev_value = std_dev / range_value
        return coefficient_of_std_dev_value


In [26]:
data = pd.read_csv(r'G:\Iris.csv')
statistics = DataSetStatistics(data)

statistics.arithmetic_mean('SepalWidthCm')

3.0540000000000007

In [27]:
statistics.geometric_mean('SepalWidthCm')

3.0235822036025914

In [28]:
statistics.harmonic_mean('SepalWidthCm')

2.9931367940540596

In [29]:
statistics.median('SepalWidthCm')

3.0

In [31]:
statistics.variance('SepalWidthCm')

0.1867506666666667

In [32]:
statistics.mode('SepalWidthCm')

3.0

In [33]:
statistics.standard_deviation('SepalWidthCm')

0.4321465800705435

In [34]:
statistics.max_vlaue('SepalWidthCm')

4.4

In [35]:
statistics.min_vlaue('SepalWidthCm')

2.0

In [36]:
statistics.range_value('SepalWidthCm')

2.4000000000000004

In [37]:
statistics.iqr('SepalWidthCm')

0.5

In [38]:
statistics.quartiles('SepalWidthCm')

(3.1, 3.0, 3.0)

In [39]:
statistics.coefficient_of_range('SepalWidthCm')

0.37500000000000006

In [40]:
statistics.coefficient_of_variation('SepalWidthCm')

0.14150182713508297

In [41]:
statistics.coefficient_of_standard_deviation('SepalWidthCm')

0.18006107502939311