### Check whether 13 CSV files are exist

In [8]:
# Import necessary libraries
import pandas as pd

# Hardcoded file paths
file_paths = [
    "../data/WorldCup_Stats/1975_Match_Stats.csv",
    "../data/WorldCup_Stats/1979_Match_Stats.csv",
    "../data/WorldCup_Stats/1983_Match_Stats.csv",
    "../data/WorldCup_Stats/1987_Match_Stats.csv",
    "../data/WorldCup_Stats/1992_Match_Stats.csv",
    "../data/WorldCup_Stats/1996_Match_Stats.csv",
    "../data/WorldCup_Stats/1999_Match_Stats.csv",
    "../data/WorldCup_Stats/2003_Match_Stats.csv",
    "../data/WorldCup_Stats/2007_Match_Stats.csv",
    "../data/WorldCup_Stats/2011_Match_Stats.csv",
    "../data/WorldCup_Stats/2015_Match_Stats.csv",
    "../data/WorldCup_Stats/2019_Match_Stats.csv",
    "../data/WorldCup_Stats/2023_Match_Stats.csv",
]

# Ensure the correct number of files is provided
if len(file_paths) != 13:
    print("Error: Please specify exactly 13 CSV file paths.")
else:
    # Initialize a list to hold DataFrames
    dataframes = []

    # Load each file and handle potential errors
    for file_path in file_paths:
        try:
            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path)
            # Add the DataFrame to the list
            dataframes.append(df)
        except FileNotFoundError:
            print(f"Error: File not found -> {file_path}")
        except pd.errors.EmptyDataError:
            print(f"Error: File is empty -> {file_path}")
        except Exception as e:
            print(f"An error occurred while processing {file_path}: {e}")

    # Validate all files were successfully loaded
    if len(dataframes) == 13:
        print(f"Successfully loaded and combined all 13 files. Combined DataFrame info:")
    else:
        print(
            f"Warning: Only {len(dataframes)} out of 13 files were successfully loaded.")

Successfully loaded and combined all 13 files. Combined DataFrame info:


### Merge 13 CSV files

In [9]:
import pandas as pd
import glob

# Define the folder path where your CSV files are stored
folder_path = "../data/WorldCup_Stats/*.csv"

# Use glob to find all CSV files in the folder
file_paths = glob.glob(folder_path)  # This gets all .csv files in the folder

# Check if any CSV files are found
if not file_paths:
    print("No CSV files found in the directory. Please check the folder path.")
else:
    # Read each CSV file into a DataFrame and store them in a list
    data_frames = [pd.read_csv(file) for file in file_paths]

    # Combine all DataFrames into one (row-wise)
    combined_df = pd.concat(data_frames, ignore_index=True)

    # Save the combined DataFrame to a new CSV file
    output_file_name = "../data/out/combined_world_cup_stats.csv"
    combined_df.to_csv(output_file_name, index=False)

    print(f"Combined CSV file has been saved as '{output_file_name}'.")
    
print(combined_df.head())

Combined CSV file has been saved as '../data/out/combined_world_cup_stats.csv'.
   Unnamed: 0.1  Unnamed: 0        date        venue match_category team_1  \
0             0          11         NaN   Nottingham   League-Match    PAK   
1             1           5         NaN        Leeds   League-Match    EAf   
2             2          12  1975-06-18        Leeds     Semi-Final    ENG   
3             3           8  1975-06-14   Birmingham   League-Match    ENG   
4             4          13         NaN     The Oval     Semi-Final     NZ   

  team_2  team_1_runs  team_1_wickets  team_2_runs  team_2_wickets  \
0     SL        330.0             6.0        138.0             0.0   
1    IND        120.0             0.0        123.0             0.0   
2    AUS         93.0             0.0         94.0             6.0   
3    EAf        290.0             5.0         94.0             0.0   
4     WI        158.0             0.0        159.0             5.0   

                              