In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import os
import glob
import pandas as pd

def combine_csv_files(input_folder, prefix, output_csv):
    """
    Searches for all CSV files in `input_folder` whose filenames start with `prefix`.
    Concatenates them into a single DataFrame and writes the result to `output_csv`.

    :param input_folder: Path to the folder containing CSV files
    :param prefix: Common filename prefix to look for (e.g., "train_metadata")
    :param output_csv: Output filename for the combined CSV
    """
    # Create a list of all CSV files in the input folder that match the prefix
    csv_pattern = os.path.join(input_folder, f"{prefix}*.csv")
    csv_files = glob.glob(csv_pattern)

    if not csv_files:
        print(f"No files found for prefix '{prefix}' in folder: {input_folder}")
        return

    df_list = []
    for file in csv_files:
        print(f"Processing file: {file}")
        df = pd.read_csv(file)
        df_list.append(df)

    # Concatenate all DataFrames into one
    combined_df = pd.concat(df_list, ignore_index=True)

    # Save the combined DataFrame to a CSV file
    combined_df.to_csv(output_csv, index=False)
    print(f"Combined CSV saved to: {output_csv}")


if __name__ == "__main__":
    # 1) Update this to the folder path in your Google Drive where the CSV files are located.
    #    If running in Google Colab, you might mount your drive and set this to something like
    #    "/content/drive/MyDrive/APS360_Project" (adjust as needed).
    input_folder = "/content/drive/MyDrive/APS360_Project"  # <-- CHANGE THIS PATH

    # 2) Combine all train metadata CSV files
    #    Example: train_metadata_1.csv, train_metadata_2.csv, etc.
    combine_csv_files(
        input_folder=input_folder,
        prefix="train_metadata",
        output_csv=os.path.join(input_folder, "combined_train_metadata.csv")
    )

    # 3) Combine all validation metadata CSV files
    #    Example: val_metadata_1.csv, val_metadata_2.csv, etc.
    combine_csv_files(
        input_folder=input_folder,
        prefix="val_metadata",
        output_csv=os.path.join(input_folder, "combined_val_metadata.csv")
    )

    # 4) Combine all test metadata CSV files
    #    Example: test_metadata_1.csv, test_metadata_2.csv, etc.
    combine_csv_files(
        input_folder=input_folder,
        prefix="test_metadata",
        output_csv=os.path.join(input_folder, "combined_test_metadata.csv")
    )


Processing file: /content/drive/MyDrive/APS360_Project/train_metadata2.csv
Processing file: /content/drive/MyDrive/APS360_Project/train_metadata.csv
Processing file: /content/drive/MyDrive/APS360_Project/train_metadata3.csv
Processing file: /content/drive/MyDrive/APS360_Project/train_metadata4.csv
Combined CSV saved to: /content/drive/MyDrive/APS360_Project/combined_train_metadata.csv
Processing file: /content/drive/MyDrive/APS360_Project/val_metadata.csv
Processing file: /content/drive/MyDrive/APS360_Project/val_metadata2.csv
Processing file: /content/drive/MyDrive/APS360_Project/val_metadata3.csv
Processing file: /content/drive/MyDrive/APS360_Project/val_metadata4.csv
Combined CSV saved to: /content/drive/MyDrive/APS360_Project/combined_val_metadata.csv
Processing file: /content/drive/MyDrive/APS360_Project/test_metadata.csv
Processing file: /content/drive/MyDrive/APS360_Project/test_metadata2.csv
Processing file: /content/drive/MyDrive/APS360_Project/test_metadata3.csv
Processing fi