In [64]:
import pandas as pd
import os

class DataTransformer:
    """
    Transforms data into overlapped sequences for deep learning models.
    """
    def __init__(self, filepath, overlap):
        """
        Initializes the preprocessor with the file path.
        """
        self.filepath = filepath
        self.overlap = overlap
        self.data = None
        self.load_data()

    def load_data(self):
        """
        Loads data from a CSV file.
        """
        self.data = pd.read_csv(self.filepath)

    def keep_cols(self):
        """
        Keep relevant columns.
        """
        # Some players have extra columns from data before 219. THese will not be used
        keep_cols = [
            "assists",
            "bonus",
            "bps",
            "clean_sheets",
            "creativity",
            "goals_conceded",
            "goals_scored",
            "ict_index",
            "influence",
            "kickoff_time",
            "minutes",
            "own_goals",
            "penalties_missed",
            "penalties_saved",
            "red_cards",
            "saves",
            "team_a_score",
            "team_h_score",
            "threat",
            "value",
            "was_home",
            "yellow_cards",
            "starts"
                    ]
        
        self.data = self.data[keep_cols]

        return self.data
    
    def merge_dataframes_side_by_side(self):
        """
        Merges two DataFrames side by side and appends '_col_1' to the column names of the second DataFrame.
        """
        dfs = pd.DataFrame()
        for i in range(len(self.data) - self.overlap):
            df1 = self.data.iloc[[i]]
            for j in range(1, self.overlap):
                df2 = self.data.iloc[[i+j]]
                df2_renamed = df2.rename(columns=lambda x: f'{x}_{j}')
                df1 = pd.concat([df1, df2_renamed.set_index(df1.index)], axis=1).reset_index(drop=True)
            
            dfs = pd.concat([dfs, df1], axis=0)
            
        self.data = dfs

        return self.data
    
    def append_df_to_csv(self, csv_file_path):
        """
        Creates a CSV file from a DataFrame or appends to it if it already exists.

        """
        # Check if the file already exists
        file_exists = os.path.isfile(csv_file_path)

        # Write or append to the CSV file
        self.data.to_csv(csv_file_path, mode='a', header=not file_exists, index=False)

if __name__ == "__main__":
    transformer = DataTransformer(filepath='../data/latest/Kevin_De Bruyne.csv', overlap=3)
    transformer.keep_cols()
    transformer.merge_dataframes_side_by_side()
    transformer.append_df_to_csv("../data/raw.csv")


In [66]:
import pandas as pd
import os

class DataTransformer:
    """
    Transforms data into overlapped sequences for deep learning models.
    """

    def __init__(self, filepath: str, overlap: int):
        """
        Initializes the preprocessor with the file path.
        """
        self.filepath = filepath
        self.overlap = overlap
        self.data = self.load_data()

    def load_data(self) -> pd.DataFrame:
        """
        Loads data from a CSV file.
        """
        return pd.read_csv(self.filepath)

    def keep_cols(self) -> None:
        """
        Keep relevant columns.
        """
        keep_cols = [
            "assists", "bonus", "bps", "clean_sheets", "creativity",
            "goals_conceded", "goals_scored", "ict_index", "influence",
            "kickoff_time", "minutes", "own_goals", "penalties_missed",
            "penalties_saved", "red_cards", "saves", "team_a_score",
            "team_h_score", "threat", "value", "was_home", "yellow_cards", "starts"
        ]
        self.data = self.data[keep_cols]

    def merge_dataframes_side_by_side(self) -> None:
        """
        Merges DataFrame rows side by side, appending suffixes for overlapping columns.
        """
        dfs = pd.DataFrame()
        for i in range(len(self.data) - self.overlap + 1):
            df1 = self.data.iloc[[i]]
            for j in range(1, self.overlap):
                df2 = self.data.iloc[[i + j]]
                df2_renamed = df2.rename(columns=lambda x: f"{x}_{j}")
                df1 = pd.concat([df1.reset_index(drop=True), df2_renamed.reset_index(drop=True)], axis=1)

            dfs = pd.concat([dfs, df1], ignore_index=True)

        self.data = dfs

    def append_df_to_csv(self, csv_file_path: str) -> None:
        """
        Appends the DataFrame to a CSV file, creating the file if it doesn't exist.
        """
        file_exists = os.path.isfile(csv_file_path)
        self.data.to_csv(csv_file_path, mode='a', header=not file_exists, index=False)

if __name__ == "__main__":
    transformer = DataTransformer(
        filepath="../data/latest/Kevin_De Bruyne.csv", overlap=3
    )
    transformer.keep_cols()
    transformer.merge_dataframes_side_by_side()
    transformer.append_df_to_csv("../data/raw.csv")
