In [1]:
import pandas as pd

# I. Import Raw Data

In [2]:
df_raw = pd.read_csv("./2024-07-11T19-39_export.csv")
df_raw = df_raw.rename(columns={"Unnamed: 0": "index"})
df_raw['date'] = pd.to_datetime(df_raw['date'])
#df_raw

# II. Filling Methods

## 1.Mean 15 days before data set

In [3]:
df_Mean_15_day_ = df_raw.copy()

class mean_15_days_filling:
    def __init__(self, df_Mean_15_day_input_):
        self.df_Mean_15_day_input = df_Mean_15_day_input_
        self.df_Mean_15_day_output = self.process_filling()
        
    def process_filling(self):
        try:
            for index, row in self.df_Mean_15_day_input.iterrows():
                for col in self.df_Mean_15_day_input.columns:
                    if row[col] == -1:
                        mean_value = self.get_mean_15_days(row['date'], col)
                        self.df_Mean_15_day_input.at[index, col] = mean_value
            return self.df_Mean_15_day_input
        except Exception as e:
            print(f"An error occurred: {e}")
            return None

    def get_mean_15_days(self, current_date, col_name):
        start_date = current_date - pd.Timedelta(days=15)
        mask = (self.df_Mean_15_day_input['date'] < current_date) & (self.df_Mean_15_day_input['date'] >= start_date)
        return self.df_Mean_15_day_input.loc[mask, col_name].replace(-1, pd.NA).mean()

df_Mean_15_day_output_ =  mean_15_days_filling(df_Mean_15_day_).df_Mean_15_day_output
#df_Mean_15_day_output_.style

In [4]:
df_Mean_15_day_output_.dtypes

index                int64
date        datetime64[ns]
stock_id            object
open               float64
high               float64
low                float64
close              float64
volume             float64
dtype: object

## 2.Backward Filling

In [5]:
backward_filling_ = df_raw.copy()

class backward_filling:
    def __init__(self, backward_filling_input_):
        self.backward_filling_input = backward_filling_input_
        self.backward_filling_output = self.process_filling()
        
    def process_filling(self):
        try:
            # Replace all -1 values placeholder with NA in pandas
            self.backward_filling_input = self.backward_filling_input.replace(-1, pd.NA)
            self.backward_filling_output = self.backward_filling_input.bfill()
            # Convert the numeric columns to float again
            columns_to_convert = ['open', 'high', 'low', 'close', 'volume']
            self.backward_filling_output[columns_to_convert] = self.backward_filling_output[columns_to_convert].apply(pd.to_numeric, errors='coerce')
            return self.backward_filling_output
        except Exception as e:
            print(f"An error occurred: {e}")
            return None

backward_filling_output_ =  backward_filling(backward_filling_).backward_filling_output

#backward_filling_output_.style

In [6]:
backward_filling_output_.dtypes

index                int64
date        datetime64[ns]
stock_id            object
open               float64
high               float64
low                float64
close              float64
volume             float64
dtype: object

## 3.Forward Filling

In [7]:
forward_filling_ = df_raw.copy()

class forward_filling:
    def __init__(self, forward_filling_input_):
        self.forward_filling_input = forward_filling_input_
        self.forward_filling_output = self.process_filling()
        
    def process_filling(self):
        try:
            # Replace all -1 values placeholder with NA in pandas
            self.forward_filling_input = self.forward_filling_input.replace(-1, pd.NA)
            self.forward_filling_output = self.forward_filling_input.ffill()
            # Convert the numeric columns to float again
            columns_to_convert = ['open', 'high', 'low', 'close', 'volume']
            self.forward_filling_output[columns_to_convert] = self.forward_filling_output[columns_to_convert].apply(pd.to_numeric, errors='coerce')
            return self.forward_filling_output
        except Exception as e:
            print(f"An error occurred: {e}")
            return None

forward_filling_output_ =  forward_filling(forward_filling_).forward_filling_output

#forward_filling_output_.style

In [8]:
forward_filling_output_.dtypes

index                int64
date        datetime64[ns]
stock_id            object
open               float64
high               float64
low                float64
close              float64
volume             float64
dtype: object

## 4.Linear Interpolation

In [None]:
linear_interpolation_ = df_raw.copy()

class linear_interpolation:
    def __init__(self, linear_interpolation_input_):
        self.linear_interpolation_input = linear_interpolation_input_
        self.linear_interpolation_ouput = self.process_filling()
        
    def process_filling(self):
        try:
            # Replace all -1 values placeholder with NA in pandas
            self.linear_interpolation_input = self.linear_interpolation_input.replace(-1, pd.NA)
            self.linear_interpolation_ouput = self.linear_interpolation_input.interpolate(method='linear')
            # Convert the numeric columns to float again
            columns_to_convert = ['open', 'high', 'low', 'close', 'volume']
            self.linear_interpolation_ouput[columns_to_convert] = self.linear_interpolation_ouput[columns_to_convert].apply(pd.to_numeric, errors='coerce')
            return self.linear_interpolation_ouput
        except Exception as e:
            print(f"An error occurred: {e}")
            return None

linear_interpolation_output_ =  linear_interpolation(linear_interpolation_).linear_interpolation_ouput

#linear_interpolation_output_.style

In [None]:
linear_interpolation_output_.dtypes

## 5.Polynomial Interpolation

In [None]:
polynomial_interpolation_ = df_raw.copy()

class polynomial_interpolation:
    def __init__(self, polynomial_interpolation_input_, order_input_):
        self.polynomial_interpolation_input = polynomial_interpolation_input_
        self.order = order_input_
        self.polynomial_interpolation_output = self.process_filling()
        
    def process_filling(self):
        try:
            # Replace all -1 values placeholder with NA in pandas
            self.polynomial_interpolation_input = self.polynomial_interpolation_input.replace(-1, pd.NA)
            self.polynomial_interpolation_output = self.polynomial_interpolation_input.interpolate(method='polynomial', order=self.order)
            # Convert the numeric columns to float again
            columns_to_convert = ['open', 'high', 'low', 'close', 'volume']
            self.polynomial_interpolation_output[columns_to_convert] = self.polynomial_interpolation_output[columns_to_convert].apply(pd.to_numeric, errors='coerce')
            return self.polynomial_interpolation_output
        except Exception as e:
            print(f"An error occurred: {e}")
            return None

polynomial_interpolation_output_ =  polynomial_interpolation(polynomial_interpolation_).polynomial_interpolation_output

#polynomial_interpolation_output_.style

In [None]:
polynomial_interpolation_output_.dtypes

## 6.Time Based Interpolation

In [None]:
time_based_interpolation_ = df_raw.copy()

class time_based_interpolation:
    def __init__(self, time_based_interpolation_input_):
        self.time_based_interpolation_input = time_based_interpolation_input_
        self.time_based_interpolation_output = self.process_filling()
        
    def process_filling(self):
        try:
            # Replace all -1 values placeholder with NA in pandas
            self.time_based_interpolation_input = self.time_based_interpolation_input.replace(-1, pd.NA)
            self.time_based_interpolation_output = self.time_based_interpolation_input.interpolate(method='time')
            # Convert the numeric columns to float again
            columns_to_convert = ['open', 'high', 'low', 'close', 'volume']
            self.time_based_interpolation_output[columns_to_convert] = self.time_based_interpolation_output[columns_to_convert].apply(pd.to_numeric, errors='coerce')
            return self.time_based_interpolation_output
        except Exception as e:
            print(f"An error occurred: {e}")
            return None

time_based_interpolation_output_ =  time_based_interpolation(time_based_interpolation_).time_based_interpolation_output

#time_based_interpolation_output_.style

In [None]:
time_based_interpolation_output_.dtypes

## 7.Moving Average

In [None]:
moving_average_ = df_raw.copy()

class moving_average:
    def __init__(self, moving_average_input_, windows_input, min_period_input):
        self.moving_average_input = moving_average_input_
        self.windows = windows_input
        self.min_period = min_period_input
        self.moving_average_output = self.process_filling()
        
    def process_filling(self):
        try:
            # Replace all -1 values placeholder with NA in pandas
            self.moving_average_input = self.moving_average_input.replace(-1, pd.NA)
            self.moving_average_output = self.moving_average_input.fillna(self.moving_average_input.rolling(window=self.windows, min_periods=self.min_period).mean())
            # Convert the numeric columns to float again
            columns_to_convert = ['open', 'high', 'low', 'close', 'volume']
            self.moving_average_output[columns_to_convert] = self.moving_average_output[columns_to_convert].apply(pd.to_numeric, errors='coerce')
            return self.moving_average_output
        except Exception as e:
            print(f"An error occurred: {e}")
            return None

moving_average_output_ =  moving_average(moving_average_, 3, 1).moving_average_output

#moving_average_output_.style

In [None]:
moving_average_output_.dtypes

## 8.KNN Imputation

In [None]:
knn_imputation_ = df_raw.copy()

class knn_imputation:
    def __init__(self, knn_imputation_input_, n_neighbors=5):
        self.knn_imputation_input = knn_imputation_input_
        self.n_neighbors = n_neighbors
        self.knn_imputation_output = self.process_filling()
        
    def process_filling(self):
        try:
            # Replace all -1 values placeholder with NA in pandas
            self.knn_imputation_input = self.knn_imputation_input.replace(-1, pd.NA)
            imputer = KNNImputer(n_neighbors=self.n_neighbors)
            self.knn_imputation_output = self.knn_imputation_input.copy()
            imputed_data = imputer.fit_transform(self.knn_imputation_output)
            self.knn_imputation_output.iloc[:, :] = imputed_data
            # Convert the numeric columns to float again
            columns_to_convert = ['open', 'high', 'low', 'close', 'volume']
            self.knn_imputation_output[columns_to_convert] = self.knn_imputation_output[columns_to_convert].apply(pd.to_numeric, errors='coerce')
            return self.knn_imputation_output
        except Exception as e:
            print(f"An error occurred: {e}")
            return None
            
knn_imputation_output_ = KNNImputation(knn_imputation_, n_neighbors=3).knn_imputation_output
#knn_imputation_output_.style

In [None]:
knn_imputation_output_.dtypes

## 9.ARIMA Imputation

In [None]:
arima_imputation_ = df_raw.copy()

class arima_imputation:
    def __init__(self, arima_imputation_input_, order=(1, 1, 1)):
        self.arima_imputation_input = arima_imputation_input_
        self.order = order
        self.arima_imputation_output = self.process_filling()
        
    def process_filling(self):
        try:
            # Replace all -1 values placeholder with NA in pandas
            self.arima_imputation_input = self.arima_imputation_input.replace(-1, pd.NA)
            self.arima_imputation_output = self.arima_imputation_input.copy()
            columns_to_impute = ['open', 'high', 'low', 'close', 'volume']
            
            for column in columns_to_impute:
                series = self.arima_imputation_output[column]
                model = ARIMA(series, order=self.order)
                model_fit = model.fit()
                imputed_values = model_fit.predict(start=0, end=len(series)-1)
                self.arima_imputation_output[column] = series.fillna(pd.Series(imputed_values, index=series.index))
                
            # Convert the numeric columns to float again
            self.arima_imputation_output[columns_to_impute] = self.arima_imputation_output[columns_to_impute].apply(pd.to_numeric, errors='coerce')
            return self.arima_imputation_output
        except Exception as e:
            print(f"An error occurred: {e}")
            return None

arima_imputation_output_ = ARIMAImputation(df_raw.copy(), order=(1, 1, 1)).arima_imputation_output
#arima_imputation_output_.style

In [None]:
arima_imputation_output_.dtypes