In [7]:
import os
import pandas as pd

In [8]:
FOLDER_PATH = "../data/single_datas"
OUTPUT_FILE = "../data/merged_prices.csv"

In [10]:
def load_cleaned_files(folder_path):
    dfs = []
    column_names = []

    for file in os.listdir(folder_path):
        if file.endswith("_cleaned.csv"):  
            file_path = os.path.join(folder_path, file)
            column_name = file.replace("_cleaned.csv", "")

            df = pd.read_csv(file_path)

            if "City" not in df.columns:
                print(f"no column 'City' in {file_path}.")
                continue

            price_columns = [col for col in df.columns if col != "City"]
            if not price_columns:
                print(f"no column price {file_path}.")
                continue

            df = df.rename(columns={price_columns[0]: column_name})

            df = df.drop_duplicates(subset="City")

            dfs.append(df[["City", column_name]])
            column_names.append(column_name)

            safety_index_file = "../data/safety_index_by_city_cleaned.csv"
    
    if os.path.exists(safety_index_file):
        df_safety = pd.read_csv(safety_index_file)

        if "City" not in df_safety.columns:
            print(f"No 'City' column in {safety_index_file}.")
        else:
            column_name = "Safety Index"
            df_safety = df_safety.rename(columns={df_safety.columns[1]: column_name})
            df_safety = df_safety.drop_duplicates(subset="City")
            dfs.append(df_safety[["City", column_name]])
            column_names.append(column_name)
    else:
        print(f"Safety index file not found: {safety_index_file}")

    return dfs, column_names

In [11]:
def merge_dataframes(dfs):
    if not dfs:
        print("no file _cleaned.csv found")
        return None

    merged_df = dfs[0]
    for df in dfs[1:]:
        merged_df = pd.merge(merged_df, df, on="City", how="outer")

    return merged_df.drop(columns=["City"])

In [12]:
def save_merged_file(merged_df, output_path):
    if merged_df is not None:
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        merged_df.to_csv(output_path, index=False)
        print(f"file saved : {output_path}")
    else:
        print("no file to save")

In [13]:
dfs, column_names = load_cleaned_files(FOLDER_PATH)
merged_df = merge_dataframes(dfs)
save_merged_file(merged_df, OUTPUT_FILE)

file saved : ../data/merged_prices.csv
