In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import pandas as pd

class Prep :
  '''
  전처리 도구 모음

  def fill_missing_values(self, df, 
                          strategy = "mean", 
                          *, 
                          fill_value = None) :


  def ret_standard_scaling(self, df) :

  def ret_minmax_scailing(self, df) :

  def cut_iqr(self, X_y_df = (None, None), coefficient = 1.5, exclude = []) :

  def cut_iqr_each_class(self, X_y_df = (None, None), coefficient = 1.5, exclude = []) :

  '''
  # 누락값 채우기
  def fill_missing_values(self, df, 
                          strategy = "mean", 
                          *, 
                          fill_value = None) :
    if fill_value != None :
      strategy = "constant"

    df_copy = df.copy()

    imputer = SimpleImputer(strategy = strategy, fill_value)
    imputer.fit(df_copy)

    df_fill = pd.DataFrame(imputer.transform(df_copy), columns = df_copy.columns)

    return df_fill

  # 스케일링 함수 시작
  def ret_standard_scaling(self, df) :

    scaler = StandardScaler()
    scaler.fit(df)
    scaled_np = scaler.transform(df)

    scaled_df = pd.DataFrame(scaled_np, columns = df.columns).reset_index(drop = True)

    return scaled_df

  def ret_minmax_scailing(self, df) :

    scaler = MinMaxScaler()
    scaler.fit(df)
    scaled_np = scaler.transform(df)

    scaled_df = pd.DataFrame(scaled_np, columns = df.columns).reset_index(drop = True)

    return scaled_df
  # 스케일링 함수 끝

  # 이상치 제거 함수 시작
  def cut_iqr(self, X_y_df = (None, None), coefficient = 1.5, exclude = []) :
    
    if len(X_y_df) != 2 :
      raise ValueError(f"X_y_df의 길이는 2 이어야 합니다. (현재 {len(X_y_df)})")
    X_df = X_y_df[0].copy()
    y_df = X_y_df[1].copy()

    upper_iqr = X_df.mean() + (coefficient * X_df.std())
    lower_iqr = X_df.mean() - (coefficient * X_df.std())
    
    for column in X_df.columns :
      if column in exclude :
        continue

      # X_df의 값들이 바뀌기 전에 y_df에서 거른다
      is_in_iqr = (X_df[column] > lower_iqr[0]) & (X_df[column] < upper_iqr[0])
      y_df = y_df[is_in_iqr]
      X_df = X_df[is_in_iqr]

    return (X_df, y_df)

  # Classification data에서 class마다 나눠서 feature 자르기
  def cut_iqr_each_class(self, X_y_df = (None, None), coefficient = 1.5, exclude = []) :
    if len(X_y_df) != 2 :
      raise ValueError(f"X_y_df의 길이는 2 이어야 합니다. (현재 {len(X_y_df)})")

    X_df = X_y_df[0].copy()
    y_df = X_y_df[1].copy()

    result_X_df = pd.DataFrame()
    result_y_df = pd.DataFrame()
    for i in range(self.target_num) :
      con_temp = y_df.iloc[:, 0] == i
      X_temp = X_df[con_temp]
      y_temp = y_df[con_temp]
      cutted_X, cutted_y = self.cut_iqr(X_y = (X_temp, y_temp), coefficient = coefficient, exclude = exclude)
      
      result_X_df = result_X_df.append(cutted_X)
      result_y_df = result_y_df.append(cutted_y)

    result_X_df = result_X_df.reset_index(drop = True)
    result_X_df = result_y_df.reset_index(drop = True)

    return (X_df, y_df)
  # 이상치 제거 함수 끝