In [7]:
df = pd.read_csv(r"C:\Users\ARUN KUMAR\Desktop\261  & 264\package 261 & 264\Iris.csv")
df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


In [60]:
import pandas as pd
import numpy as np

class DataSetStatistics:
    def __init__(self, data):
        """
        Initialize the DataSetStatistics object.

        Parameters:
        - data (DataFrame): The input DataFrame containing numeric data.
        """
        self.data = data.select_dtypes(include='number')
        self.length = len(self.data)

    def arithmetic_mean(self, column_name):
        """
        Calculate the arithmetic mean of a given column.

        Parameters:
        - column_name (str): The name of the column.

        Returns:
        - float: The arithmetic mean.
        """
        if column_name in self.data.columns:
            column_data = self.data[column_name]
            numeric_data = pd.to_numeric(column_data, errors='coerce')
            valid_numeric_data = numeric_data.dropna()

            return np.mean(valid_numeric_data)
        else:
            return None 

    def geometric_mean(self, column_name):
        """
        Calculate the geometric mean of a given column.

        Parameters:
        - column_name (str): The name of the column.

        Returns:
        - float: The geometric mean.
        """
        if column_name in self.data.columns:
            column_data = self.data[column_name]
            numeric_data = pd.to_numeric(column_data, errors='coerce')
            valid_numeric_data = numeric_data.dropna()
            product = np.prod(numeric_data)
            return (product ** (1 / len(numeric_data))) if len(numeric_data) > 0 else 0  
        else:
            return None

    def harmonic_mean(self, column_name):
        """
        Calculate the harmonic mean of a given column.

        Parameters:
        - column_name (str): The name of the column.

        Returns:
        - float: The harmonic mean.
        """
        if column_name in self.data.columns:
            column_data = self.data[column_name]
            numeric_data = pd.to_numeric(column_data, errors='coerce')
            valid_numeric_data = numeric_data.dropna()

            if 0 in numeric_data.values:
                return 0  
            return len(numeric_data) / sum(1 / num for num in numeric_data) if len(numeric_data) > 0 else 0
        else:
            return None

    def median(self, column_name):
        """
        Calculate the median of a given column.

        Parameters:
        - column_name (str): The name of the column.

        Returns:
        - float: The median.
        """
        if column_name in self.data.columns:
            column_data = sorted(self.data[column_name])
            n = len(column_data)
            if n % 2 == 0:
                median_value = (column_data[n // 2 - 1] + column_data[n // 2]) / 2
            else:
                median_value = column_data[n // 2]
            return median_value
        else:
            return None

    def variance(self, column_name):
        """
        Calculate the variance of a given column.

        Parameters:
        - column_name (str): The name of the column.

        Returns:
        - float: The variance.
        """
        if column_name in self.data.columns:
            column_data = self.data[column_name]
            numeric_data = pd.to_numeric(column_data, errors='coerce')
            valid_numeric_data = numeric_data.dropna()
        
            mean = self.arithmetic_mean(column_name)  
            sum_squared_diff = sum((x - mean) ** 2 for x in numeric_data)  

            return sum_squared_diff / len(numeric_data) if len(numeric_data) > 0 else 0  
        else:
            return None

    def mode(self, column_name):
        """
        Calculate the mode of a given column.

        Parameters:
        - column_name (str): The name of the column.

        Returns:
        - float: The mode.
        """
        freq = {}
        for num in self.data[column_name]:
            if num not in freq:
                freq[num] = 1
            else:
                freq[num] += 1
        mode_value = max(freq, key=freq.get)
        return mode_value

    def standard_deviation(self, column_name):
        """
        Calculate the standard deviation of a given column.

        Parameters:
        - column_name (str): The name of the column.

        Returns:
        - float: The standard deviation.
        """
        return self.variance(column_name) ** 0.5

    def max_value(self, column_name):
        """
        Find the maximum value in a given column.

        Parameters:
        - column_name (str): The name of the column.

        Returns:
        - float: The maximum value.
        """
        return max(self.data[column_name])

    def min_value(self, column_name):
        """
        Find the minimum value in a given column.

        Parameters:
        - column_name (str): The name of the column.

        Returns:
        - float: The minimum value.
        """
        return min(self.data[column_name])

    def range_value(self, column_name):
        """
        Calculate the range of a given column.

        Parameters:
        - column_name (str): The name of the column.

        Returns:
        - float: The range value.
        """
        max_value = max(self.data[column_name])
        min_value = min(self.data[column_name])

        range_value = max_value - min_value
        return range_value

    def iqr(self, column_name):
        """
        Calculate the interquartile range (IQR) of a given column.

        Parameters:
        - column_name (str): The name of the column.

        Returns:
        - float: The interquartile range (IQR).
        """
        values = list(self.data[column_name])
        n = len(values)
        for i in range(n):
            for j in range(0, n - i - 1):
                if values[j] > values[j + 1]:
                    values[j], values[j + 1] = values[j + 1], values[j]

        q1_index = n // 4
        q3_index = n * 3 // 4

        q1 = values[q1_index]
        q3 = values[q3_index]

        return q3 - q1

    def quartiles(self, column_name):
        """
        Calculate the quartiles (Q1, Q2, Q3) of a given column.

        Parameters:
        - column_name (str): The name of the column.

        Returns:
        - tuple: Quartiles (Q1, Q2, Q3).
        """
        column = self.data[column_name]
        q1_index = len(column) // 4
        q2_index = len(column) // 2
        q3_index = len(column) * 3 // 4
        
        q1 = column[q1_index]
        q2 = column[q2_index]
        q3 = column[q3_index]

        return q1, q2, q3

    def coefficient_of_range(self, column_name):
        """
        Calculate the coefficient of range of a given column.

        Parameters:
        - column_name (str): The name of the column.

        Returns:
        - float: The coefficient of range.
        """
        max_value = max(self.data[column_name])
        min_value = min(self.data[column_name])
        range_value = max_value - min_value
        coefficient_of_range_value = range_value / (max_value + min_value)
        return coefficient_of_range_value

    def coefficient_of_variation(self, column_name):
        """
        Calculate the coefficient of variation of a given column.

        Parameters:
        - column_name (str): The name of the column.

        Returns:
        - float: The coefficient of variation.
        """
        return self.standard_deviation(column_name) / self.arithmetic_mean(column_name)

    def coefficient_of_standard_deviation(self, column_name):
        """
        Calculate the coefficient of standard deviation of a given column.

        Parameters:
        - column_name (str): The name of the column.

        Returns:
        - float: The coefficient of standard deviation.
        """
        mean = sum(self.data[column_name]) / len(self.data[column_name])
        std_dev = (sum((x - mean) ** 2 for x in self.data[column_name]) / len(self.data[column_name])) ** 0.5
        max_value = max(self.data[column_name])
        min_value = min(self.data[column_name])
        range_value = max_value - min_value
        coefficient_of_std_dev_value = std_dev / range_value
        return coefficient_of_std_dev_value

    def covariance(self, column1, column2):
        """
        Calculate the covariance between two columns.

        Parameters:
        - column1 (str): The name of the first column.
        - column2 (str): The name of the second column.
        
        Returns:
        - float: The covariance between the two columns.
        """
        if column1 in self.data.columns and column2 in self.data.columns:
            return np.cov(self.data[column1], self.data[column2], ddof=0)[0, 1]
        else:
            return None

    def correlation(self, column1, column2):
        """
        Calculate the correlation coefficient between two columns.

        Parameters:
        - column1 (str): The name of the first column.
        - column2 (str): The name of the second column.

        Returns:
        - float: The correlation coefficient between the two columns.
        """
        if column1 in self.data.columns and column2 in self.data.columns:
            return np.corrcoef(self.data[column1], self.data[column2])[0, 1]
        else:
            return None

statistics = DataSetStatistics(df)


In [43]:

statistics.arithmetic_mean('SepalWidthCm')

3.0540000000000003

In [44]:
statistics.geometric_mean('SepalWidthCm')


3.0235822036025914

In [45]:
statistics.harmonic_mean('SepalWidthCm')


2.9931367940540596

In [46]:
statistics.median('SepalWidthCm')

3.0

In [47]:
statistics.variance('SepalWidthCm')


0.1867506666666668

In [48]:
statistics.mode('SepalWidthCm')


3.0

In [49]:
statistics.standard_deviation('SepalWidthCm')


0.43214658007054363

In [59]:
statistics.max_value('SepalWidthCm')


4.4

In [61]:
statistics.min_value('SepalWidthCm')

2.0

In [52]:
statistics.range_value('SepalWidthCm')


2.4000000000000004

In [53]:
statistics.iqr('SepalWidthCm')


0.5

In [54]:
statistics.quartiles('SepalWidthCm')


(3.1, 3.0, 3.0)

In [55]:
statistics.coefficient_of_range('SepalWidthCm')


0.37500000000000006

In [56]:
statistics.coefficient_of_variation('SepalWidthCm')


0.14150182713508302

In [57]:
statistics.coefficient_of_standard_deviation('SepalWidthCm')


0.18006107502939311