In [2]:
import pandas as pd
import os

In [3]:
def merge_csv_files(file_paths, output_path='merged_data.csv'):
    """
    Merge multiple CSV files with the same format into a single CSV file

    Parameters:
    file_paths (list): List of file paths to the CSV files to merge
    output_path (str): Path where the merged CSV file will be saved

    Returns:
    pandas.DataFrame: The merged dataframe
    """
    # Check if file_paths is empty
    if not file_paths:
        raise ValueError("No files provided for merging")

    # List to store individual dataframes
    dataframes = []

    # Read each CSV file and append to the list
    for file in file_paths:
        if not os.path.exists(file):
            print(f"Warning: File {file} does not exist and will be skipped")
            continue

        try:
            df = pd.read_csv(file)
            dataframes.append(df)
            print(f"Successfully read {file} with {df.shape[0]} rows and {df.shape[1]} columns")
        except Exception as e:
            print(f"Error reading {file}: {str(e)}")

    # Check if any dataframes were successfully loaded
    if not dataframes:
        raise ValueError("No valid CSV files could be read")

    # Concatenate all dataframes
    merged_df = pd.concat(dataframes, ignore_index=True)

    # Save the merged dataframe to a CSV file
    merged_df.to_csv(output_path, index=False)
    print(f"Merged data saved to {output_path}")

    return merged_df

In [4]:
# Example usage

# List your CSV files here
files_to_merge = [
    "riasec_survey_data_2025-04-17.csv",
    "riasec_survey_data_2025-04-18.csv",
    "riasec_survey_data_2025-04-18 2.csv"
]

# Merge the files
merged_data = merge_csv_files(files_to_merge, "dataset.csv")

# Display the first few rows of the merged dataframe
print("\nPreview of merged data:")
print(merged_data.head())

# Print some statistics about the merged data
print(f"\nTotal number of rows in merged data: {merged_data.shape[0]}")
print(f"Total number of columns in merged data: {merged_data.shape[1]}")

Successfully read riasec_survey_data_2025-04-17.csv with 128 rows and 34 columns
Successfully read riasec_survey_data_2025-04-18.csv with 172 rows and 34 columns
Successfully read riasec_survey_data_2025-04-18 2.csv with 23 rows and 34 columns
Merged data saved to dataset.csv

Preview of merged data:
                name student_id                 timestamp  \
0   Aytaç Eren Cirit      321.0  2025-04-17T15:34:42.836Z   
1      Hüseyin Demir      273.0  2025-04-17T15:37:15.063Z   
2  Erdem Ruhi Baysal      286.0  2025-04-17T15:37:53.382Z   
3          Mert Yuva      300.0  2025-04-17T15:39:48.807Z   
4       Zeren Çalgın     1309.0  2025-04-17T15:40:19.259Z   

                     raw_response  q1  q2  q3  q4  q5  q6  ...  q21  q22  q23  \
0  221121122122222121211111211111   1   1   0   0   1   0  ...    0    0    0   
1  121121221111211122212112212111   0   1   0   0   1   0  ...    1    0    0   
2  221221222211211122212212222122   1   1   0   1   1   0  ...    1    1    0   
3  2211