In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, StratifiedKFold
from imblearn.pipeline import Pipeline as ImPipeline
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.feature_selection import RFECV
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import L1, L2
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.seasonal import STL
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from datetime import datetime
from xgboost import XGBRegress
from sklearn.base import TransformerMixin, BaseEstimator
%matplotlib inline

In [None]:
class DataFrameInterpolator(BaseEstimator, TransformerMixin):
    def __init__(self, method_map=None):
        self.method_map = method_map

    def fit(self, X, y=None):
        # Not needed for interpolation
        return self

    def transform(self, X):
        if not isinstance(X, pd.DataFrame):
            raise ValueError("Input must be a pandas DataFrame")
        
        if self.method_map is None:
            raise ValueError("You must provide a method map for interpolation.")
        
        X_interpolated = X.copy()
        
        for column, method in self.method_map.items():
            if method == 'ffill':
                X_interpolated[column] = X_interpolated[column].fillna(method='ffill')
            elif method == 'linear':
                X_interpolated[column] = X_interpolated[column].interpolate(method='linear')
            else:
                raise ValueError("Invalid interpolation method. Choose 'ffill' or 'linear'.")
        
        return X_interpolated

# Example usage:
# Create a method map specifying interpolation methods for each column
method_map = {
    'column1': 'linear',
    'column2': 'ffill',
    'column3': 'linear'
}

# Create an instance of the DataFrameInterpolator class with the method map
interpolator = DataFrameInterpolator(method_map=method_map)

# Assuming you have a DataFrame df
# transformed_df = interpolator.transform(df)
# You can then use transformed_df in your scikit-learn pipeline


In [None]:
method_map = {
    'column1': 'linear',
    'column2': 'ffill',
    'column3': 'linear'

In [None]:
pipeline = Pipeline([
    ('interpolate', DataFrameInterpolator(method='linear')),
    ('clf', RandomForestClassifier())
])
