In [1]:
import pandas as pd
import os

In [2]:
os.chdir('..')

In [3]:
base_folder = 'data'

In [4]:
# Get a list of all subfolders in the base folder
subfolders = [f.path for f in os.scandir(base_folder) if f.is_dir()]

In [9]:
all_merged_datas = []  # List to store merged dataframes

In [10]:
# Iterate through each subfolder
for subfolder in subfolders:
    try:
        # Construct the file paths for each CSV within the subfolder
        totals_path = os.path.join(subfolder, 'Totals.csv')
        chart_path = os.path.join(subfolder, 'Chart data.csv')
        table_path = os.path.join(subfolder, 'Table data.csv')

        # Read CSV files with error handling
        try:
            totals_data = pd.read_csv(totals_path)
        except FileNotFoundError:
            totals_data = pd.DataFrame({'Date': [], 'Views': []})

        try:
            chart_data = pd.read_csv(chart_path)
        except FileNotFoundError:
            chart_data = pd.DataFrame({'Date': [], 'Cities': [], 'City name': [], 'Views': []})

        try:
            table_data = pd.read_csv(table_path)
        except FileNotFoundError:
            table_data = pd.DataFrame({'Cities': [], 'City name': [], 'Geography': [], 'Views': [],
                                       'Watch time (hours)': [], 'Average view duration': []})

        # Find common columns dynamically
        common_columns = set(chart_data.columns) & set(table_data.columns)

        # Check if there are common columns
        if common_columns:
            # Merge based on the common columns
            merged_data = pd.merge(chart_data, table_data, on=list(common_columns), how='left')

            # Merge the result with the "Totals" data based on the "Date" column
            final_data = pd.merge(merged_data, totals_data, on='Date', how='left')

            # Append the merged dataframe to the list
            all_merged_datas.append(final_data)

            # Display the final merged data for the current subfolder
            print(f'\nMerged Data for {subfolder}:')
            print(final_data)

            # Save the merged data to a new CSV file within each subfolder
            output_path = os.path.join(subfolder, 'merged_data.csv')
            final_data.to_csv(output_path, index=False)
        else:
            print(f"Skipping {subfolder}: No common columns found between chart_data and table_data.")

    except Exception as e:
        print(f"Error processing {subfolder}: {str(e)}")

print("\nProcessing complete.")


Merged Data for data/Sharing service:
             Date             Sharing service  Shares_x  Shares_y
0      2020-06-28                    Facebook         0         0
1      2020-06-29                    Facebook         0         1
2      2020-06-30                    Facebook         0         0
3      2020-07-01                    Facebook         0         0
4      2020-07-02                    Facebook         0         0
...           ...                         ...       ...       ...
11506  2023-12-24  Share to WhatsApp Business         0         0
11507  2023-12-25  Share to WhatsApp Business         0         0
11508  2023-12-26  Share to WhatsApp Business         0         0
11509  2023-12-27  Share to WhatsApp Business         0         0
11510  2023-12-28  Share to WhatsApp Business         0         0

[11511 rows x 4 columns]
Skipping data/Viewer age: No common columns found between chart_data and table_data.

Merged Data for data/Cities:
            Date            

In [11]:
# Concatenate all merged datas into a single dataframe
if all_merged_datas:
    all_merged_data = pd.concat(all_merged_datas, ignore_index=True)

    # Display the final concatenated dataframe
    print("\nConcatenated Data:")
    print(all_merged_data)

    # Save the concatenated dataframe to a new CSV file
    all_output_path = os.path.join("data", 'all_merged_data.csv')
    all_merged_data.to_csv(all_output_path, index=False)
else:
    print("No common columns found in any subfolder.")


Concatenated Data:
              Date Sharing service  Shares_x  Shares_y Cities City name  \
0       2020-06-28        Facebook       0.0       0.0    NaN       NaN   
1       2020-06-29        Facebook       0.0       1.0    NaN       NaN   
2       2020-06-30        Facebook       0.0       0.0    NaN       NaN   
3       2020-07-01        Facebook       0.0       0.0    NaN       NaN   
4       2020-07-02        Facebook       0.0       0.0    NaN       NaN   
...            ...             ...       ...       ...    ...       ...   
121500  2023-12-24             NaN       NaN       NaN    NaN       NaN   
121501  2023-12-25             NaN       NaN       NaN    NaN       NaN   
121502  2023-12-26             NaN       NaN       NaN    NaN       NaN   
121503  2023-12-27             NaN       NaN       NaN    NaN       NaN   
121504  2023-12-28             NaN       NaN       NaN    NaN       NaN   

        Views_x Geography Geography.1  Watch time (hours)  ...  \
0           N

In [13]:
all_merged_data

Unnamed: 0,Date,Sharing service,Shares_x,Shares_y,Cities,City name,Views_x,Geography,Geography.1,Watch time (hours),...,New and returning viewers,Subscription source,Subscribers_x,Subscribers gained,Subscribers lost,Subscribers_y,Operating system,Traffic source,Impressions,Impressions click-through rate (%)
0,2020-06-28,Facebook,0.0,0.0,,,,,,,...,,,,,,,,,,
1,2020-06-29,Facebook,0.0,1.0,,,,,,,...,,,,,,,,,,
2,2020-06-30,Facebook,0.0,0.0,,,,,,,...,,,,,,,,,,
3,2020-07-01,Facebook,0.0,0.0,,,,,,,...,,,,,,,,,,
4,2020-07-02,Facebook,0.0,0.0,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121500,2023-12-24,,,,,,1.0,,,,...,,,,,,,,Notifications,,
121501,2023-12-25,,,,,,2.0,,,,...,,,,,,,,Notifications,,
121502,2023-12-26,,,,,,4.0,,,,...,,,,,,,,Notifications,,
121503,2023-12-27,,,,,,0.0,,,,...,,,,,,,,Notifications,,
