In [1]:
!pip install watchdog




In [1]:
code = """
import boto3

def upload_csv_to_s3(local_file_path="All_needed_entity.csv"):
    s3 = boto3.client(
        's3',
        aws_access_key_id="YOUR_ACCESS_KEY",
        aws_secret_access_key="YOUR_SECRET_KEY",
        region_name="ap-south-1"
    )

    bucket_name = "YOUR_BUCKET_NAME"
    s3_key = "ner-output/" + local_file_path

    try:
        s3.upload_file(local_file_path, bucket_name, s3_key)
        print("CSV successfully uploaded to S3 at:", s3_key)

    except Exception as e:
        print("Error uploading file:", e)
"""

with open("upload_csv_to_s3.py", "w", encoding="utf-8") as f:
    f.write(code)


In [None]:
import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import os
import pandas as pd
from NER_custom_model import process_resume      
from upload_csv_to_s3 import upload_to_s3   

WATCH_FOLDER = "pdf_files"
CSV_FILE = "All_needed_entity.csv"

#Watchdog Handler
class ResumeHandler(FileSystemEventHandler):
    def on_created(self, event):
       
        if not event.src_path.lower().endswith(".pdf"):
            return
        
        # Wait so file is completely saved
        time.sleep(2)
        pdf_path = event.src_path
        filename = os.path.basename(pdf_path)

        print(f"\nNew PDF detected: {filename}")

        try:
            # 1. Extract using NER model
            row = process_resume(pdf_path)

            # Add file name column
            row["filename"] = filename

            # 2. Append data to CSV
            df = pd.DataFrame([row])
            df.to_csv(
                CSV_FILE,
                mode='a',
                index=False,
                header=not os.path.exists(CSV_FILE)
            )

            print(f"Successfully extracted & saved to CSV: {filename}")

            # 3. Auto-upload updated CSV to S3
            upload_to_s3()
            print("CSV automatically uploaded to S3!")

        except Exception as e:
            print(f"Error processing file {filename}: {str(e)}")

upload_to_s3()
print("Uploaded to S3!")



#Start Folder Watch 
def start_watching():
    observer = Observer()
    handler = ResumeHandler()

    observer.schedule(handler, WATCH_FOLDER, recursive=False)
    observer.start()

    print(f"\nWatching folder: {WATCH_FOLDER}")
    print("Drop any PDF to auto-extract → update CSV → auto-upload to S3\n")

    try:
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        observer.stop()

    observer.join()


start_watching()


Uploaded to S3 → ner-output/All_needed_entity_1765285158.csv
Uploaded to S3!

Watching folder: pdf_files
Drop any PDF to auto-extract → update CSV → auto-upload to S3


New PDF detected: Sejal_Chourey_resume.pdf (1).pdf
Successfully extracted & saved to CSV: Sejal_Chourey_resume.pdf (1).pdf
Uploaded to S3 → ner-output/All_needed_entity_1765285314.csv
CSV automatically uploaded to S3!

New PDF detected: SEJAL_CHOUREY_Zarthi.pdf.pdf
Successfully extracted & saved to CSV: SEJAL_CHOUREY_Zarthi.pdf.pdf
Uploaded to S3 → ner-output/All_needed_entity_1765285989.csv
CSV automatically uploaded to S3!

New PDF detected: SEJAL_CHOUREY_.pdf.pdf
Successfully extracted & saved to CSV: SEJAL_CHOUREY_.pdf.pdf
Uploaded to S3 → ner-output/All_needed_entity_1765300010.csv
CSV automatically uploaded to S3!

New PDF detected: SEJAL_CHOUREY_JoraEducation.pdf.pdf
Successfully extracted & saved to CSV: SEJAL_CHOUREY_JoraEducation.pdf.pdf
Uploaded to S3 → ner-output/All_needed_entity_1765303780.csv
CSV automati