#### Importing Libraries

In [None]:
import requests
import os
import xml.etree.ElementTree as ET
from datetime import datetime
from azure.storage.blob import BlobServiceClient, BlobClient

#### Defining Functions

In [None]:
# Upload function to 'adams' folder
def upload_pdf_to_adams(file_path, file_name):
    try:
        # Connect to Azure Blob Storage
        blob_service_client = BlobServiceClient.from_connection_string(AZURE_CONNECTION_STRING)
 
        # Create blob client with folder prefix
        blob_path = f"adams/{file_name}"
        blob_client = blob_service_client.get_blob_client(container=AZURE_CONTAINER_NAME, blob=blob_path)
 
        # Upload the PDF file
        with open(file_path, "rb") as data:
            blob_client.upload_blob(data, overwrite=True)
 
        print(f"✅ Uploaded '{file_name}' to 'adams/' folder in 'non-eci' container.")
    except Exception as e:
        print(f"❌ Upload failed: {e}")
 
def urlResponse(searchTerm):
   
    # Base API endpoint for NRC ADAMS advanced search
    base_url = "https://adams.nrc.gov/wba/services/search/advanced/nrc"
 
    # Search parameters dictionary
    params = {
        "q": (
            f"(mode:sections,sections:(filters:(public-library:!t),"
            f"options:(added-this-month:!t,within-folder:(enable:!f,insubfolder:!f,path:'')),"
            f"properties_search_all:!(!(DocumentType,starts,'{searchTerm}',''))))"
        ),
        "qn": "New",
        "tab": "advanced-search-pars",
        "s": "$date",
        "so": "DESC"
        }
 
    urlResponse =  requests.get(base_url, params=params)
   
    return urlResponse
 
 
def extractXMLProperties(urlResponse):
   
    accessionNoList   = []
    publishedDateList = []
   
    root = ET.fromstring(urlResponse.content)
    for result in root.findall(".//result"):
        accession = result.findtext("AccessionNumber")
        publishedDate = result.findtext("PublishDatePARS")
       
        # cleaning Published Date
        cleaned = " ".join(publishedDate.split()[:3])  # Keeps "04/01/2025 08:10 AM"
        dt = datetime.strptime(cleaned, "%m/%d/%Y %I:%M %p")
        # Extract just the date
        cleanedPublishedDate = dt.date()
       
        # appending it to list
        accessionNoList.append(accession)
        publishedDateList.append(cleanedPublishedDate)
   
    return accessionNoList, publishedDateList
 
def uploadPDFsToAzure(aList, pubDateList):
    try:
        # Create Azure Blob Service Client
        blob_service_client = BlobServiceClient.from_connection_string(AZURE_CONNECTION_STRING)
 
        for idx, aNo in enumerate(aList):
            downloadURL = f"https://adamswebsearch2.nrc.gov/webSearch2/main.jsp?AccessionNumber={aNo}"
            pdf_response = requests.get(downloadURL)
 
            if pdf_response.status_code == 200:
                file_name = f"{aNo}_{pubDateList[idx]}.pdf"
                blob_path = f"adams/{file_name}"  # upload path in blob
 
                # Get blob client for the file
                blob_client = blob_service_client.get_blob_client(container=AZURE_CONTAINER_NAME, blob=blob_path)
 
                # Upload directly from memory
                blob_client.upload_blob(pdf_response.content, overwrite=True)
 
                print(f"✅ Uploaded '{file_name}' to Azure Blob Storage.")
            else:
                print(f"⚠️ Failed to download {aNo} (HTTP {pdf_response.status_code})")
 
    except Exception as e:
        print(f"❌ Error during upload: {e}")
 
 
 

In [None]:
AZURE_CONNECTION_STRING = ""
AZURE_CONTAINER_NAME = "non-eci"  

In [None]:
SAVE_FOLDER = "nrc_adams_ler_pdfs"
os.makedirs(SAVE_FOLDER, exist_ok=True)

# url building and getting response from the url
searchTerm = 'Licensee Event Report (LER)'
response   = urlResponse(searchTerm)

# extractXMLProperties
accessionNo_List, publishedDate_List = extractXMLProperties(response)

# Total PDFs to Download
curDate  = datetime.now()
curMonth = curDate.month
curYear  = curDate.year
months = {1:'JAN', 2:'FEB',3:'MAR',4:'APR', 5:'MAY', 6:'JUN', 7:'JUL', 8:'AUG', 9:'SEP',10:'OCT',
            11:'NOV', 12:'DEC'}
print(f'Total PDFs from {months[curMonth]},{curYear}: {len(accessionNo_List)}')
uploadPDFsToAzure(accessionNo_List, publishedDate_List)