In [21]:
import pandas as pd
import numpy as np
from scipy import stats

class DataCleaner:
    def __init__(self, df):  # Corrected constructor
        self.df = df.copy()

    def handle_missing_values(self, method='mean'):
        for column in self.df.columns:
            if self.df[column].isnull().sum() > 0:
                if self.df[column].dtype in ['float64', 'int64']:
                    if method == 'mean':
                        self.df[column].fillna(self.df[column].mean(), inplace=True)
                    elif method == 'median':
                        self.df[column].fillna(self.df[column].median(), inplace=True)
                    elif method == 'mode':
                        self.df[column].fillna(self.df[column].mode()[0], inplace=True)
                else:
                    self.df[column].fillna(self.df[column].mode()[0], inplace=True)
        return self.df

    def remove_duplicates(self):
        self.df.drop_duplicates(inplace=True)
        return self.df

    def detect_and_handle_outliers(self, method='zscore', threshold=3):
        numeric_cols = self.df.select_dtypes(include=[np.number]).columns
        if method == 'zscore':
            z_scores = np.abs(stats.zscore(self.df[numeric_cols]))
            self.df = self.df[(z_scores < threshold).all(axis=1)]
        elif method == 'iqr':
            Q1 = self.df[numeric_cols].quantile(0.25)
            Q3 = self.df[numeric_cols].quantile(0.75)
            IQR = Q3 - Q1
            self.df = self.df[~((self.df[numeric_cols] < (Q1 - 1.5 * IQR)) |
                                (self.df[numeric_cols] > (Q3 + 1.5 * IQR))).any(axis=1)]
        return self.df
    def get_cleaned_data(self):
        return self.df


# Example usage
if __name__ == "__main__":  # Corrected module check
    dataset_path = "iris.csv"
    df = pd.read_csv(dataset_path)
    cleaner = DataCleaner(df)
    cleaner.handle_missing_values(method='mean')
    cleaner.remove_duplicates()
    cleaner.detect_and_handle_outliers(method='zscore')
    cleaned_df = cleaner.get_cleaned_data()
    cleaned_df.to_csv("cleaned_dataset.csv", index=False)
    print("Data cleaning complete. Cleaned file saved as 'cleaned_dataset.csv'.")

Data cleaning complete. Cleaned file saved as 'cleaned_dataset.csv'.
