In [44]:
import pandas as pd
from datetime import datetime
import os

In [45]:
# Define the influence factors for each time period
influence_periods = [
    {
        "start_date": "2022-05-26", "end_date": "2023-01-25",
        "influences": {"th_vp": 48.32, "ch_vp_r2": 51.68}
    },
    {
        "start_date": "2023-01-26", "end_date": "2023-06-07",
        "influences": {"th_vp": 41.95, "ch_vp_r2": 44.88, "gc_vp_s3": 13.17}
    },
    {
        "start_date": "2023-06-08", "end_date": "2023-10-13",
        "influences": {"th_vp": 41.95, "ch_vp_r2": 44.88, "gc_vp_s4": 13.17}
    },
    {
        "start_date": "2023-10-14", "end_date": "2024-01-03",
        "influences": {"th_vp": 41.95, "ch_vp_r3": 44.88, "gc_vp_s4": 13.17}
    },
    {
        "start_date": "2024-01-04", "end_date": "2024-06-26",
        "influences": {
            "th_vp": 32.33, "ch_vp_r3": 34.59, "gc_vp_s5": 10.15,
            "gc_vp_mm_s5": 2.82, "coc_vp_s5": 4.32, "dab_vp_s5": 3.01
        }
    },
    {
        "start_date": "2024-06-27", "end_date": "2024-12-31",
        "influences": {"th_vp": 48.32, "ch_vp_r4": 51.68}
    }
]

In [46]:
def calculate_influence(row, influences):
    """Calculates the influence for a row based on the given influence percentages."""
    influence_sum = 0
    for column, influence_value in influences.items():
        influence_sum += row.get(column, 0) * (influence_value / 100)
    return influence_sum

In [47]:
def add_influence_column(df, file_date_str):
    """Adds the 'influence' column to the DataFrame based on the file date."""
    # Convert file_date_str to datetime object
    file_date = datetime.strptime(file_date_str, "%Y-%m-%d")
    
    # Find the correct influence period based on the file date
    for period in influence_periods:
        start_date = datetime.strptime(period["start_date"], "%Y-%m-%d")
        end_date = datetime.strptime(period["end_date"], "%Y-%m-%d")
        if start_date <= file_date <= end_date:
            # Calculate the influence column for each row
            df["influence"] = df.apply(calculate_influence, axis=1, influences=period["influences"])
            break
    
    return df

In [50]:
def process_file(file_path, file_date_str):
    """Process a single CSV file: add influence column, calculate HHI and mHHI."""
    data = pd.read_csv(file_path)

    # Add the influence column
    data = add_influence_column(data, file_date_str)

    # Calculate squared values for HHI and CPI
    data['th_vp_squared'] = data['th_vp'] ** 2
    data['influence_squared'] = data['influence'] ** 2

    # Calculate HHI and CPI
    HHI = round(data['th_vp_squared'].sum(), 2)
    CPI = round(data['influence_squared'].sum(), 2)

    # Log progress
    print(f"File: {file_path} | Date: {file_date_str} | HHI: {HHI} | CPI: {CPI}")
    
    return file_date_str, HHI, CPI

In [51]:
def process_files_in_folder(folder_path, output_path):
    """Process all CSV files in the specified folder and save HHI/mHHI results."""
    # Create output directory if it doesn't exist
    output_dir = os.path.dirname(output_path)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
    results = []

    for file_name in csv_files:
        file_path = os.path.join(folder_path, file_name)
        file_date_str = file_name[:-4]  # Extract date from file name

        # Process the file and get HHI and CPI
        date, HHI, CPI = process_file(file_path, file_date_str)
        results.append([date, HHI, CPI])

    # Save the results to the output CSV file
    results_df = pd.DataFrame(results, columns=["date", "HHI", "CPI"])
    results_df.to_csv(output_path, index=False)
    print(f"Results saved to {output_path}")

In [52]:
# Example usage:
process_files_in_folder("../Folder 4/", "../Data_Source/output_hhi_cpi.csv")

File: ../Folder 4/2024-01-01.csv | Date: 2024-01-01 | HHI: 331.29 | CPI: 130.02
File: ../Folder 4/2024-01-02.csv | Date: 2024-01-02 | HHI: 333.43 | CPI: 130.46
File: ../Folder 4/2024-01-03.csv | Date: 2024-01-03 | HHI: 331.04 | CPI: 129.84
File: ../Folder 4/2024-01-04.csv | Date: 2024-01-04 | HHI: 331.64 | CPI: 86.55
File: ../Folder 4/2024-01-05.csv | Date: 2024-01-05 | HHI: 333.07 | CPI: 86.74
File: ../Folder 4/2024-01-06.csv | Date: 2024-01-06 | HHI: 332.95 | CPI: 86.7
File: ../Folder 4/2024-01-07.csv | Date: 2024-01-07 | HHI: 333.03 | CPI: 86.69
File: ../Folder 4/2024-01-08.csv | Date: 2024-01-08 | HHI: 333.38 | CPI: 86.75
File: ../Folder 4/2024-01-09.csv | Date: 2024-01-09 | HHI: 333.47 | CPI: 86.75
File: ../Folder 4/2024-01-10.csv | Date: 2024-01-10 | HHI: 333.22 | CPI: 86.69
File: ../Folder 4/2024-01-11.csv | Date: 2024-01-11 | HHI: 333.14 | CPI: 86.64
File: ../Folder 4/2024-01-12.csv | Date: 2024-01-12 | HHI: 333.1 | CPI: 86.59
File: ../Folder 4/2024-01-13.csv | Date: 2024-01-13