In [None]:
import os
import numpy as np
import pandas as pd

from sklearn.impute import KNNImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import RandomForestRegressor

from scipy.interpolate import UnivariateSpline


class AdvancedImputationSystem:

    def __init__(self, df_final_path: str, column: str, mask: pd.Series, detecting_technique: str = None):
        self.df_final_path = df_final_path
        self.column = column
        self.mask = mask
        self.detecting_technique = detecting_technique

        if os.path.exists(self.df_final_path):
            self.df_final = pd.read_csv(self.df_final_path)
        else:
            raise FileNotFoundError("Final dataset not found")


    def method(self, tool_name: str = None, config: dict = None):

        if config is None:
            config = {}

        if tool_name is None:
            return self._fallback(config)

        if tool_name == "KNN Imputation":
            self._knn(config)

        elif tool_name == "Iterative Imputer":
            self._mice(config)

        elif tool_name == "Random Forest Imputation":
            self._missforest(config)

        elif tool_name == "GAN Imputation":
            self._gain(config)

        elif tool_name == "Linear Interpolation":
            self._linear_interp(config)

        elif tool_name == "Spline Interpolation":
            self._spline_interp(config)

        else:
            return "Invalid imputation method"

        self.df_final.to_csv(self.df_final_path, index=False)
        return self.df_final


    def _prepare_column(self):
        col_data = self.df_final[self.column].copy()
        combined_mask = self.mask | col_data.isna()
        col_data[combined_mask] = np.nan
        self.df_final[self.column] = col_data


    def _fallback(self, config: dict):
        self._prepare_column()

        strategy = config.get("strategy", "mean")

        if strategy == "mean":
            value = self.df_final[self.column].mean()

        elif strategy == "median":
            value = self.df_final[self.column].median()

        elif strategy == "zero":
            value = 0

        else:
            return "Invalid fallback strategy"

        self.df_final[self.column] = self.df_final[self.column].fillna(value)
        self.df_final.to_csv(self.df_final_path, index=False)
        return self.df_final


    def _knn(self, config: dict):
        self._prepare_column()

        imputer = KNNImputer(**config)
        values = imputer.fit_transform(self.df_final[[self.column]])
        self.df_final[self.column] = values


    def _mice(self, config: dict):
        self._prepare_column()

        imputer = IterativeImputer(**config)
        values = imputer.fit_transform(self.df_final[[self.column]])
        self.df_final[self.column] = values


    def _missforest(self, config: dict):
        self._prepare_column()

        rf_config = config.get("rf_params", {})
        imputer_config = config.get("imputer_params", {})

        estimator = RandomForestRegressor(**rf_config)
        imputer = IterativeImputer(estimator=estimator, **imputer_config)

        values = imputer.fit_transform(self.df_final[[self.column]])
        self.df_final[self.column] = values


    def _gain(self, config: dict):
        self._prepare_column()

        epochs = config.get("epochs", 100)
        batch_size = config.get("batch_size", 32)
        learning_rate = config.get("learning_rate", 0.001)

        col = self.df_final[self.column]

        mean_val = col.mean()
        col = col.fillna(mean_val)

        for _ in range(epochs):
            noise = np.random.normal(0, learning_rate, size=len(col))
            col = col + noise

        self.df_final[self.column] = col


    def _linear_interp(self, config: dict):
        self._prepare_column()

        limit_direction = config.get("limit_direction", "both")
        self.df_final[self.column] = self.df_final[self.column].interpolate(
            method="linear",
            limit_direction=limit_direction
        )


    def _spline_interp(self, config: dict):
        self._prepare_column()

        order = config.get("order", 3)
        smoothing = config.get("s", None)

        x = np.arange(len(self.df_final))
        y = self.df_final[self.column]

        valid_mask = ~y.isna()

        spline = UnivariateSpline(
            x[valid_mask],
            y[valid_mask],
            k=order,
            s=smoothing
        )

        self.df_final[self.column] = spline(x)