In [None]:
# Install all necessary packages
!pip install psycopg2-binary spacy transformers torch sentencepiece
!pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1.tar.gz

In [None]:
import os
import psycopg2
import spacy
from transformers import pipeline
import torch
from kaggle_secrets import UserSecretsClient

# Get the database URL from Kaggle's secret manager
user_secrets = UserSecretsClient()
DATABASE_URL = user_secrets.get_secret("RENDER_DATABASE_URL")

In [None]:
!wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=1lliKPLzQbkyg-gJpPMmTOOzwls9pJK2_' -O model-best.zip
!unzip model-best.zip -d ./model

In [None]:
print("Loading models... This may take a while.")
NER_MODEL_PATH = "./model/model-best"
SENTIMENT_MODEL_NAME = "KOlCi/distilbert-financial-sentiment"

nlp_ner = None
sentiment_pipeline = None

try:
    nlp_ner = spacy.load(NER_MODEL_PATH)
    # Kaggle gives us a GPU, so we set device=0
    sentiment_pipeline = pipeline("sentiment-analysis", model=SENTIMENT_MODEL_NAME, device=0)
    print("Models loaded successfully.")
except Exception as e:
    print(f"FATAL: Could not load models. Error: {e}")


In [None]:
def analyze_and_save():
    if not nlp_ner or not sentiment_pipeline:
        print("Models not loaded, cannot process.")
        return

    processed_count = 0
    try:
        with psycopg2.connect(DATABASE_URL) as conn:
            with conn.cursor() as cur:
                # Process in a large batch since we have more power now
                cur.execute(
                    "SELECT content_hash, content FROM briefs WHERE sentiment IS NULL LIMIT 100"
                )
                briefs_to_process = cur.fetchall()

                if not briefs_to_process:
                    print("No new briefs to process.")
                    return

                print(f"Found {len(briefs_to_process)} briefs to analyze.")
                for content_hash, text in briefs_to_process:
                    try:
                        ner_doc = nlp_ner(text)
                        companies = ", ".join([ent.text for ent in ner_doc.ents]) or None
                        sentiment_result = sentiment_pipeline(text)
                        sentiment = sentiment_result[0]['label'].upper()
                        score = sentiment_result[0]['score']
                        score = str(f"{score:.4f}")
                        #sentiment = sentiment + " Confidence: " + score

                        cur.execute(
                            """
                            UPDATE briefs
                            SET subject_company = %s, sentiment = %s, confidence = %s, processed_at = NOW()
                            WHERE content_hash = %s
                            """,
                            (companies, sentiment, score, content_hash)
                        )
                        processed_count += 1
                    except Exception as e:
                        print(f"Error processing item {content_hash}: {e}")
                        conn.rollback()
    
    except Exception as e:
        print(f"A database error occurred: {e}")
    
    print(f"Processing complete. Analyzed {processed_count} brief(s).")

# Run the main function
analyze_and_save()