In [201]:
import math
import pandas as pd 
import numpy as np 
from scipy import stats
from collections import Counter

In [202]:
class StatisticalMeasures: # A class is a blueprint or a template for creating objects.
    def __init__(self, file_path):
        self.data = self.load_data(file_path)

    def load_data(self, file_path):
        data = []
        with open(file_path, 'r') as file:
            # Skip the header row
            next(file)#Python’s next() function returns the next item of an iterator.

            for line in file:
                # Assuming each line contains numerical data separated by commas
                row = line.strip().split(',')
                try:
                    # Try converting values to floats
                    row = [float(value) for value in row[:-1]]  # Exclude the last column (non-numeric)
                    data.append(row)
                except ValueError:
                    # Handle non-numeric values (you can skip or handle them differently)
                    print(f"Skipping row with non-numeric values: {row}")
        return data

    def calculate_arithmetic_mean(self, column_index):
        ''' 
        By using this 'calculate_arithmetic_mean'  function we can find the arthametic mean,
        
        by giving the file location firstly and allocating it the variable we have to write it as file.function_name and 
        
        then we have to allocate it with the column_idex in the file on which you want to function the functionality of the function 
        
        in the above function we have give the function name as 'calculate_arithmetic_mean' and the column_index  then we will gwt the output of arthametic mean for the selected inex'''
        
        if not self.data or column_index >= len(self.data[0]):
            return None  # Invalid column index

        column_sum = 0
        column_count = 0
        for row in self.data:
            if column_index < len(row):
                column_sum += row[column_index]
                column_count += 1

        return column_sum / column_count if column_count > 0 else None #AM formula sumof observations divided by number of observations
    
    def calculate_geometric_mean(self, column_index):
        if not self.is_valid_column_index(column_index):
            return None

        column_values = [float(row[column_index]) for row in self.data[1:]]
        product_of_values = math.prod(column_values)
        geometric_mean = product_of_values ** (1 / len(column_values))
        return geometric_mean
    
    def calculate_harmonic_mean(self, column_index):
        if not self.is_valid_column_index(column_index):
            return None

        column_values = [float(row[column_index]) for row in self.data[1:]]
        reciprocal_values = [1 / value for value in column_values]
        harmonic_mean = len(column_values) / sum(reciprocal_values)
        return harmonic_mean
    def is_valid_column_index(self, column_index):
        return 0 <= column_index < len(self.data[0])
    def calculate_mode(self, column_index):
        if not self.is_valid_column_index(column_index):
            return None

        column_values = [float(row[column_index]) for row in self.data[1:]]
        count_values = Counter(column_values)
        mode_values = count_values.most_common()
        if len(mode_values) > 1 and mode_values[0][1] == mode_values[1][1]:
            return None  # no unique mode
        mode = mode_values[0][0]
        return mode
    def calculate_median(self, column_index):
        if not self.is_valid_column_index(column_index):
            return None

        column_values = sorted([float(row[column_index]) for row in self.data[1:]])
        n = len(column_values)
        if n % 2 == 0:
            median = (column_values[n // 2 - 1] + column_values[n // 2]) / 2
        else:
            median = column_values[n // 2]
        return median
    def describe(self, column_index):
        if not self.is_valid_column_index(column_index):
            return None

        arithmetic_mean = self.calculate_arithmetic_mean(column_index)
        geometric_mean = self.calculate_geometric_mean(column_index)
        harmonic_mean = self.calculate_harmonic_mean(column_index)
        mode = self.calculate_mode(column_index)
        median = self.calculate_median(column_index)

        description = {
            "Arithmetic Mean": arithmetic_mean,
            "Geometric Mean": geometric_mean,
            "Harmonic Mean": harmonic_mean,
            "Mode": mode,
            "Median": median,
        }

        return description
    
    def calculate_variance(self, column_index):
        if not self.is_valid_column_index(column_index):
            return None

        column_values = [float(row[column_index]) for row in self.data[1:]]
        mean = self.calculate_arithmetic_mean(column_index)
        variance = sum((value - mean) ** 2 for value in column_values) / (len(column_values) - 1)
        return variance
    def calculate_standard_deviation(self, column_index):
        variance = self.calculate_variance(column_index)
        standard_deviation = math.sqrt(variance)
        return standard_deviation
    def find_maximum_value(self, column_index):
        if column_index < len(self.data[0]):# Check if column_index is within the range of columns
            max_value = float(self.data[1][column_index])

            for row in self.data[2:]:
                current_value = float(row[column_index])
                if current_value > max_value:
                    max_value = current_value

            return max_value
        else:
            return None
        
    def find_minimum_value(self, column_index):
        if column_index < len(self.data[0]):
            min_value = float(self.data[1][column_index])

            for row in self.data[2:]:
                current_value = float(row[column_index])
                if current_value < min_value:
                    min_value = current_value

            return min_value
        else:
            return None
    def calculate_range(self, column_index):
        if column_index < len(self.data[0]):
            column_values = [float(row[column_index]) for row in self.data[1:]]
            column_range = max(column_values) - min(column_values)
            return column_range
        else:
            print("Invalid column index")
            return None   
        
   

In [203]:
file = StatisticalMeasures( r'C:\Users\Sam\Downloads\Iris.csv')

In [189]:
arithmetic_mean = file.calculate_arithmetic_mean(2)#will be giving the output for the second  index i.e SepalWidthCm AM
arithmetic_mean

3.0540000000000007

In [190]:
geometric_mean = file.calculate_geometric_mean(2)#will be giving the output for the second  index i.e SepalWidthCm GM
geometric_mean

3.0206144489293694

In [191]:
harmonic_mean = file.calculate_harmonic_mean(2)#will be giving the output for the second  index i.e SepalWidthCm HM
harmonic_mean

2.990230489523978

In [192]:
mode=file.calculate_mode(2)
mode

3.0

In [193]:
median=file.calculate_median(2)
median

3.0

In [194]:
describe_data=file.describe(2)
describe_data

{'Arithmetic Mean': 3.0540000000000007,
 'Geometric Mean': 3.0206144489293694,
 'Harmonic Mean': 2.990230489523978,
 'Mode': 3.0,
 'Median': 3.0}

In [195]:
variance= file.calculate_variance(2)
variance

0.18793029729729732

In [196]:
sd=file.calculate_standard_deviation(2)
sd

0.43350928167375763

In [197]:
maximum=file.find_maximum_value(2)
maximum

4.4

In [198]:
minimum=file.find_minimum_value(2)
minimum

2.0

In [200]:
range1=file.calculate_range(2)
range1

2.4000000000000004