<a href="https://colab.research.google.com/github/Ishan130803/AIMS-Task-1/blob/main/SimpleImputer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

In [54]:
class SimpleImputer(object):
    def __init__(
            self,
            strategy,
            missing_values = np.nan,
            constant_value = None,
    ):

        if (strategy not in ("mean","median","mode","constant")):
            raise ValueError("Invalid Strategy for imputation")
        elif strategy == "constant" and not isinstance(constant_value,(int,float,str)):
            raise ValueError(f"Invalid imputation strategy: {self.strategy}")
        self.strategy = strategy
        self.missing_values = missing_values
        self.constant_value = constant_value


    def impute_column(self,data):


        if self.strategy == "mean":
            impute_value = np.mean(data[data != self.missing_values])
        elif self.strategy == "median":
            impute_value = np.median((data[data != self.missing_values]))
        elif self.strategy == "mode":
            impute_value = data.mode().iloc[0]
        else:
            impute_value = self.constant_value

        return data.fillna(impute_value)

    def impute_array(self,data):


        if self.strategy == "mean":
            impute_value = np.nanmean(data)
        elif self.strategy == "median":
            impute_value = np.nanmedian(data)
        elif self.strategy == "mode":
            impute_value = np.unique(data,equal_nan = True)[0]
            if np.isnan(impute_value):
                raise ValueError("Can't find mode with only NaN values")
        else:
            impute_value = self.constant_value

        return np.where(np.isnan(data), impute_value, data)

    def fit_transform(self, data, columns = []):
        if isinstance(data, pd.DataFrame):
            # Impute data for each column in the DataFrame
            for col in columns:
                data = self.impute_column(data)
        elif isinstance(data, np.ndarray):
            # Impute entire NumPy array
            if data.ndim != 2:
                raise TypeError("Invalid Shape of given array")
            for col in columns:
                data[:,col] = self.impute_array(data[:,col])

        return data

In [64]:
a = np.array([[np.nan,2,3,5,2],
              [1,3,4,66,7],
              [np.nan,5,6,8,9]])