In [0]:
import requests
import urllib3
from datetime import datetime, timedelta
import os

# Disable SSL warnings for trusted CPCB site
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# Step 1: Get today's date in IST (UTC+5:30)
ist_offset = timedelta(hours=5, minutes=30)
ist_today = datetime.now() + ist_offset
yyyymmdd = ist_today.strftime("%Y%m%d")
print(f"Today's date in IST: {yyyymmdd}")

# Step 2: Build PDF URL
pdf_url = f"https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_{yyyymmdd}.pdf"
print(f"PDF URL: {pdf_url}")

# Step 3: Set Workspace Files path
workspace_dir = "files/aqi/raw"
os.makedirs(workspace_dir, exist_ok=True)
pdf_filename = f"AQI_Bulletin_{yyyymmdd}.pdf"
pdf_path = os.path.join(workspace_dir, pdf_filename)

# Step 4: Download PDF and save to Workspace Files
try:
    response = requests.get(pdf_url, timeout=30, verify=False)  # Disable SSL verification
    response.raise_for_status()
    with open(pdf_path, "wb") as f:
        f.write(response.content)
    print(f"‚úÖ Successfully downloaded and saved: {pdf_path}")
    print(f"File size: {len(response.content)} bytes")
except requests.exceptions.RequestException as e:
    print(f"‚ùå Failed to download PDF: {e}")
    print("Note: PDF might not be available yet for today. Try running tomorrow.")

# Step 5: Verify file exists in Workspace Files
if os.path.exists(pdf_path):
    print("üìÅ Raw directory contents:")
    print(os.listdir(workspace_dir)[-5:])  # Show last 5 files
else:
    print("üìÅ No files in raw directory")
