In [22]:
import os
import pandas as pd

In [23]:
folder_path = "../data/single_datas"

In [28]:
def clean_csv(file_path, column_type="Price"):
    try:
        df = pd.read_csv(file_path)

        df.dropna(how="all", inplace=True)

        if "City" not in df.columns:
            print(f"No 'City' column in {file_path}.")
            return None

        df["City"] = df["City"].str.strip()

        if column_type == "Price":
            column_name_pattern = "Price"
        elif column_type == "Safety Index":
            column_name_pattern = "Safety Index"
        else:
            print(f"Invalid column type: {column_type}. Expected 'Price' or 'Safety Index'.")
            return None

        value_columns = [col for col in df.columns if column_name_pattern in col]

        if not value_columns:
            print(f"No {column_type} column in {file_path}.")
            return None

        for col in value_columns:
            df[col] = pd.to_numeric(df[col], errors="coerce")

        if column_type == "Price":
            for col in value_columns:
                df = df[(df[col] > 0) & (df[col] < 1000)]
        elif column_type == "Safety Index":
            for col in value_columns:
                df = df[(df[col] >= 0) & (df[col] <= 100)]

        print(f"{file_path} cleaned (Column Type: {column_type})")
        return df

    except Exception as e:
        print(f"Error in {file_path}: {e}")
        return None

In [None]:
for file in os.listdir(folder_path):
    if file.endswith(".csv") and not file.endswith("_cleaned.csv"):
        file_path = os.path.join(folder_path, file)
        cleaned_df = clean_csv(file_path)

        if cleaned_df is not None:
            cleaned_file_path = os.path.join(folder_path, file.replace(".csv", "_cleaned.csv"))

            if os.path.exists(cleaned_file_path):
                os.remove(cleaned_file_path)

            cleaned_df.to_csv(cleaned_file_path, index=False)
            print(f"File saved : {cleaned_file_path}")

In [29]:
import pandas as pd

def clean_csv(file_path):
    try:
        df = pd.read_csv(file_path)

        df.dropna(how="all", inplace=True)

        if "City" not in df.columns:
            print(f"No 'City' column in {file_path}.")
            return None

        df["City"] = df["City"].str.strip()

        if df.columns[1] != "Safety Index":
            print(f"The second column in {file_path} is not 'Safety Index'.")
            return None

        df["Safety Index"] = pd.to_numeric(df["Safety Index"], errors="coerce")

        df = df[(df["Safety Index"] >= 0) & (df["Safety Index"] <= 100)]

        print(f"{file_path} cleaned (Safety Index as second column)")
        return df

    except Exception as e:
        print(f"Error in {file_path}: {e}")
        return None


In [30]:
file_path = os.path.join("../data/", "safety_index_by_city.csv")

cleaned_df = clean_csv(file_path)

file_name = os.path.basename(file_path)
cleaned_file_path = os.path.join(os.path.dirname(file_path), file_name.replace(".csv", "_cleaned.csv"))

if cleaned_df is not None:
    cleaned_df.to_csv(cleaned_file_path, index=False)
    print(f"File saved: {cleaned_file_path}")

../data/safety_index_by_city.csv cleaned (Safety Index as second column)
File saved: ../data/safety_index_by_city_cleaned.csv
