In [1]:
import pandas as pd
from sqlalchemy import create_engine, text
import getpass
import numpy as np
import re

password = getpass.getpass("MySQL root password: ")

DB_NAME = "final_proj_new"

MySQL root password:  ········


In [3]:
# A list of datasets (each as a tuple: path, table name)
datasets = [
    ("../data/clean/election_2017_clean.csv", "election_2017"),
    ("../data/clean/election_2021_clean.csv", "election_2021"),
    ("../data/clean/election_2025_clean.csv", "election_2025"),
    ("../data/raw/Rent_price/rent_growth_berlin_2016-17.csv", "rent_growth_2017"),
    ("../data/raw/Rent_price/rent_growth_berlin_2020-21.csv", "rent_growth_2021"),
    ("../data/raw/Rent_price/rent_growth_berlin_2024-25.csv", "rent_growth_2025"),
    ("../data/raw/Economy/Gini.csv", "Gini_Index"),
    ("../data/raw/Economy/poverty.csv", "Poverty_risk_rates"),
    ("../data/raw/Economy/unemployment.csv", "Unemployment")
]

# Create connection engine
engine = create_engine(f"mysql+pymysql://root:{password}@localhost/{DB_NAME}")

# Loop over all datasets and import them
for csv_path, table_name in datasets:
    print(f"Importing {csv_path} → {table_name}")
    df = pd.read_csv(csv_path, encoding="latin1")

    # Clean column names
    clean_cols = []
    for col in df.columns:
        col = col.strip()
        col = col.replace("\n", " ")
        col = re.sub(r"[^0-9a-zA-Z_]+", "_", col)
        col = col[:50]
        if col == "":
            col = "col_" + str(len(clean_cols)+1)
        clean_cols.append(col.lower())
    df.columns = clean_cols

    # Clean data values
    df.replace([np.inf, -np.inf], np.nan, inplace=True)

    df.to_sql(table_name.lower(), con=engine, if_exists="replace", index=False)
    print(f"Table '{table_name}' created successfully!\n")

print("All datasets imported successfully")


Importing ../data/clean/election_2017_clean.csv → election_2017
Table 'election_2017' created successfully!

Importing ../data/clean/election_2021_clean.csv → election_2021
Table 'election_2021' created successfully!

Importing ../data/clean/election_2025_clean.csv → election_2025
Table 'election_2025' created successfully!

Importing ../data/raw/Rent_price/rent_growth_berlin_2016-17.csv → rent_growth_2017
Table 'rent_growth_2017' created successfully!

Importing ../data/raw/Rent_price/rent_growth_berlin_2020-21.csv → rent_growth_2021
Table 'rent_growth_2021' created successfully!

Importing ../data/raw/Rent_price/rent_growth_berlin_2024-25.csv → rent_growth_2025
Table 'rent_growth_2025' created successfully!

Importing ../data/raw/Economy/Gini.csv → Gini_Index
Table 'Gini_Index' created successfully!

Importing ../data/raw/Economy/poverty.csv → Poverty_risk_rates
Table 'Poverty_risk_rates' created successfully!

Importing ../data/raw/Economy/unemployment.csv → Unemployment
Table 'Unem