In [7]:
import pandas as pd
import os

# Input folder containing the CSV files for each season
input_folder = r"C:\Users\ALESSANDRO\Documents\GitHub\dream-team-fpl-predeiction\data\fpl"

# Output folder to save the merged dataset
output_folder = r"C:\Users\ALESSANDRO\Documents\GitHub\dream-team-fpl-predeiction\data"

# List all CSV files in the input folder
csv_files = [file for file in os.listdir(input_folder) if file.endswith(".csv")]

# Initialize an empty list to store the dataframes for each season
season_dfs = []

# Loop through each CSV file
for csv_file in csv_files:
    # Read the CSV file into a dataframe
    season = csv_file.split("_")[-1].split(".")[0]  # Extract the season from the filename
    csv_path = os.path.join(input_folder, csv_file)
    df = pd.read_csv(csv_path, encoding='latin1')  # Specify the encoding parameter
    
    # Substitute the value of season
    season_start = season[:2]
    season_end = season[2:]
    season = f"20{season_start}-20{season_end}"
    
    # Add a new column for the season
    df["Season"] = season
    
    # Append the dataframe to the list
    season_dfs.append(df)

# Concatenate all dataframes into a single dataframe
merged_df = pd.concat(season_dfs)

# Create the "Player" variable by merging the "first_name" and "second_name"
merged_df["Player"] = merged_df["first_name"] + " " + merged_df["second_name"]

# Delete the "first_name", "second_name", "now_cost", and "element_type" columns
columns_to_delete = ["first_name", "second_name", "now_cost", "element_type"]
merged_df.drop(columns=columns_to_delete, inplace=True)

# Reorder columns to have "Player" and "Season" as the leftmost columns
merged_df = merged_df[["Player", "Season"] + [col for col in merged_df.columns if col not in ["Player", "Season"]]]

# Save the merged dataframe to a CSV file
output_path = os.path.join(output_folder, "fpl_data.csv")
merged_df.to_csv(output_path, index=False)

print(f"Merged dataset saved as 'fpl_data.csv'.")


Merged dataset saved as 'fpl_data.csv'.
