In [1]:
import os
import platform
import subprocess
import torch
import transformers
import fastapi
import sqlalchemy
from sqlalchemy import create_engine, text
from transformers import pipeline
from dotenv import load_dotenv
import requests

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#  Load Environment Variables

load_dotenv()
print("Environment variables loaded successfully.\n")

# Basic metadata
print(f"Project: {os.getenv('PROJECT_NAME','RLHF_News_Summarization_System')}")
print(f"Model: {os.getenv('MODEL_ID','t5-small')}")
print(f"PostgreSQL User: {os.getenv('POSTGRES_USER','dhruv')}\n")

# System info
print(f"Hostname: {platform.node()}")
print(f"OS: {platform.system()} {platform.release()}")
print(f"Python: {platform.python_version()}\n")



Environment variables loaded successfully.

Project: RLHF_News_Summarization_System
Model: t5-small
PostgreSQL User: dhruv

Hostname: Dhruvs-MacBook-Pro-3.local
OS: Darwin 24.6.0
Python: 3.10.18



In [None]:
#  Library Versions
print(" Core Library Versions")
print(f"Torch: {torch.__version__} | CUDA Available: {torch.cuda.is_available()}")
print(f"Transformers: {transformers.__version__}")
print(f"FastAPI: {fastapi.__version__}")
print(f"SQLAlchemy: {sqlalchemy.__version__}\n")

üîç Core Library Versions
Torch: 2.9.0 | CUDA Available: False
Transformers: 4.57.1
FastAPI: 0.120.1
SQLAlchemy: 2.0.44



In [4]:
# PostgreSQL Connection Check
print("üîó Checking PostgreSQL connection\n")

POSTGRES_USER = os.getenv("POSTGRES_USER", "dhruv")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "Saidhruv19")
POSTGRES_DB = os.getenv("POSTGRES_DB", "summarization_db")
POSTGRES_PORT = os.getenv("POSTGRES_PORT", "5432")
POSTGRES_HOST = os.getenv("POSTGRES_HOST_LOCAL", "127.0.0.1")

DB_URL = f"postgresql+psycopg2://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}"
print(f" DB URL: {DB_URL}\n")

try:
    engine = create_engine(DB_URL)
    with engine.connect() as conn:
        version = conn.execute(text("SELECT version();")).fetchone()
        user_db = conn.execute(text("SELECT current_user, current_database();")).fetchone()
        print(f" Connected to PostgreSQL!\n   Version: {version[0]}")
        print(f"   User: {user_db[0]} | Database: {user_db[1]}\n")
except Exception as e:
    print(f" Database connection failed:\n{e}\n")


üîó Checking PostgreSQL connection

 DB URL: postgresql+psycopg2://dhruv:Saidhruv19@127.0.0.1:5432/summarization_db

 Connected to PostgreSQL!
   Version: PostgreSQL 15.14 (Debian 15.14-1.pgdg13+1) on aarch64-unknown-linux-gnu, compiled by gcc (Debian 14.2.0-19) 14.2.0, 64-bit
   User: dhruv | Database: summarization_db



In [5]:
# Docker & Compose Check
print("Docker Environment Check\n")

def run_cmd(cmd):
    try:
        res = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
        return res.stdout.strip()
    except subprocess.CalledProcessError:
        return None

docker_version = run_cmd("docker --version")
compose_version = run_cmd("docker compose version")

print(f" {docker_version}" if docker_version else " Docker not found.")
print(f" {compose_version}\n" if compose_version else " Docker Compose not found.\n")


Docker Environment Check

 Docker version 28.5.1, build e180ab8
 Docker Compose version v2.40.0-desktop.1



In [6]:
#  Model Load Test
print("ü§ñ Loading Summarization Model and Tokenizer...\n")

try:
    summarizer = pipeline("summarization", model=os.getenv("MODEL_ID","t5-small"))
    text_sample = (
        "The United Nations held a summit on climate change, "
        "emphasizing the urgent need to reduce carbon emissions "
        "and invest in renewable energy technologies."
    )
    summary = summarizer(text_sample, max_length=50, min_length=25, do_sample=False)[0]["summary_text"]
    print("Model and Tokenizer loaded successfully!\n")
    print("Example Summary:")
    print(summary, "\n")
except Exception as e:
    print(f"Model load failed:\n{e}\n")


ü§ñ Loading Summarization Model and Tokenizer...



Device set to use mps:0
Your max_length is set to 50, but your input_length is only 31. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=15)
Both `max_new_tokens` (=256) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Model and Tokenizer loaded successfully!

Example Summary:
the united nations held a summit on climate change . the summit emphasized the need to reduce carbon emissions and invest in renewable energy technologies . 



In [9]:
print("Checking FastAPI Health Endpoint...\n")

API_URL = f"http://{os.getenv('API_HOST','0.0.0.0')}:{os.getenv('API_PORT','8000')}/health"
api_ok = False

try:
    response = requests.get(API_URL, timeout=5)
    if response.status_code == 200:
        print(f" API is live - {API_URL}")
        print(f"Response: {response.json()}\n")
        api_ok = True
    else:
        print(f"API returned unexpected status: {response.status_code}")
except Exception as e:
    print(f"API not reachable ‚Äî ensure FastAPI container is running.\n({e})\n")

Checking FastAPI Health Endpoint...

 API is live - http://0.0.0.0:8000/health
Response: {'status': 'ok', 'message': 'FastAPI backend is live'}

