 ### Step 0: Required Libraries

In [46]:
import pandas as pd
from sqlalchemy import create_engine
import requests
import os
from pathlib import Path
from src.utils.common import read_yaml
from src.constants import SECRET_PATH

### Step 1: Load External Raw Data

In [47]:
#read the new data file
df = pd.read_csv(Path(os.getcwd())/'data/external/new_housing_data.csv') 
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,price
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


### Step 2: Connect to AWS PostgreSQL

In [48]:
# Load DB credentials from secrets.yaml
secrets = read_yaml(SECRET_PATH)
db = secrets["PostGres_DB"]

# Create SQLAlchemy engine
db_url = f"postgresql://{db['DB_USER']}:{db['DB_PWD']}@{db['DB_HOST']}:{db['DB_PORT']}/{db['DB_NAME']}"
engine = create_engine(db_url)


[2025-08-08 23:15:08,083 : INFO : common : Yaml file <_io.TextIOWrapper name='config\\secrets.yaml' mode='r' encoding='UTF-8'> loaded successfully!]


### Step 4: Upload to PostgreSQL (new_housing_data table)

In [49]:
# Write to a new table (overwrite if exists)
df.to_sql("new_housing_data", engine, if_exists="replace", index=False)
print("✅ Data uploaded to new_housing_data table")


✅ Data uploaded to new_housing_data table


### Step 5: Trigger FastAPI retraining

In [50]:
try:
    response = requests.post("http://localhost:8000/retrain")
    print("🎯 Retrain Trigger Response:", response.json())
except Exception as e:
    print("❌ Could not connect to FastAPI server:", e)


🎯 Retrain Trigger Response: {'message': 'Retraining started and completed successfully'}
