In [9]:
# pip install pandas sqlalchemy dotenv

In [10]:
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

Data base and directory setup

In [11]:
load_dotenv()
DATABASE_URL = os.getenv("PG_DATABASE_URL")

In [12]:
engine = create_engine(DATABASE_URL)

In [13]:
RAW_DATA_DIR = "../../data/bronze"
os.makedirs(RAW_DATA_DIR, exist_ok=True)

In [14]:
tables = ["User", "Designation", "Course", "EmployeeProgress", "Skill", "UserSkill", "DesignationSkill", "CourseSkill"]

Collect and save data

In [15]:
def fetch_data(table):
    """Fetch data from the specified table."""
    return pd.read_sql_table(table, con=engine)

def save_raw_data(data, table_name):
    """Save the DataFrame to a CSV file."""
    file_path = os.path.join(RAW_DATA_DIR, f'raw_{table_name}.csv')
    data.to_csv(file_path, index=False)
    print(f"Saved {table_name} to {file_path}")

def run_data_ingestion(tables):
    """Run the data ingestion process for the specified tables."""
    for table in tables:
        data = fetch_data(table)
        save_raw_data(data, table)
    print("Raw data extraction complete!")

In [16]:
if __name__ == "__main__":
    run_data_ingestion(tables)

Saved User to ../../data/bronze\raw_User.csv
Saved Designation to ../../data/bronze\raw_Designation.csv
Saved Course to ../../data/bronze\raw_Course.csv
Saved EmployeeProgress to ../../data/bronze\raw_EmployeeProgress.csv
Saved Skill to ../../data/bronze\raw_Skill.csv
Saved UserSkill to ../../data/bronze\raw_UserSkill.csv
Saved DesignationSkill to ../../data/bronze\raw_DesignationSkill.csv
Saved CourseSkill to ../../data/bronze\raw_CourseSkill.csv
Raw data extraction complete!
