In [23]:
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

In [24]:
load_dotenv()
DATABASE_URL = os.getenv("PG_DATABASE_URL")

In [25]:
engine = create_engine(DATABASE_URL)

In [26]:
RAW_DATA_DIR = "../../data/bronze"
os.makedirs(RAW_DATA_DIR, exist_ok=True)

In [27]:
tables = ["User", "Designation", "Course", "EmployeeProgress", "Skill", "UserSkill", "DesignationSkill", "CourseSkill"]

In [28]:
def fetch_data(table):
    """Fetch data from the specified table."""
    return pd.read_sql_table(table, con=engine)

def save_raw_data(data, table_name):
    """Save the DataFrame to a CSV file."""
    file_path = os.path.join(RAW_DATA_DIR, f'{table_name}.csv')
    data.to_csv(file_path, index=False)
    print(f"Saved {table_name} to {file_path}")

def run_data_ingestion(tables):
    """Run the data ingestion process for the specified tables."""
    for table in tables:
        data = fetch_data(table)
        save_raw_data(data, table)
    print("Raw data extraction complete!")

In [29]:
if __name__ == "__main__":
    run_data_ingestion(tables)

Saved User to ../../data/bronze\User.csv
Saved Designation to ../../data/bronze\Designation.csv
Saved Course to ../../data/bronze\Course.csv
Saved EmployeeProgress to ../../data/bronze\EmployeeProgress.csv
Saved Skill to ../../data/bronze\Skill.csv
Saved UserSkill to ../../data/bronze\UserSkill.csv
Saved DesignationSkill to ../../data/bronze\DesignationSkill.csv
Saved CourseSkill to ../../data/bronze\CourseSkill.csv
Raw data extraction complete!
