In [2]:
# Install all necessary packages
!pip install psycopg2-binary spacy transformers torch sentencepiece
!pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1.tar.gz

Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-n

In [5]:
import os
import psycopg2
import spacy
from transformers import pipeline
import torch
from kaggle_secrets import UserSecretsClient

# Get the database URL from Kaggle's secret manager
user_secrets = UserSecretsClient()
DATABASE_URL = user_secrets.get_secret("RENDER_DATABASE_URL")

2025-08-11 10:11:25.429126: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754907085.638296      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754907085.697470      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [6]:
print("Loading models... This may take a while.")
NER_MODEL_PATH = "/kaggle/input/spacy-ner/pytorch/default/1/model-best"
SENTIMENT_MODEL_NAME = "KOlCi/distilbert-financial-sentiment"

nlp_ner = None
sentiment_pipeline = None

try:
    nlp_ner = spacy.load(NER_MODEL_PATH)
    # Kaggle gives us a GPU, so we set device=0
    sentiment_pipeline = pipeline("sentiment-analysis", model=SENTIMENT_MODEL_NAME, device=0)
    print("Models loaded successfully.")
except Exception as e:
    print(f"FATAL: Could not load models. Error: {e}")


Loading models... This may take a while.




config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Device set to use cuda:0


Models loaded successfully.


In [7]:
def analyze_and_save():
    if not nlp_ner or not sentiment_pipeline:
        print("Models not loaded, cannot process.")
        return

    processed_count = 0
    try:
        with psycopg2.connect(DATABASE_URL) as conn:
            with conn.cursor() as cur:
                # Process in a large batch since we have more power now
                cur.execute(
                    "SELECT content_hash, content FROM briefs WHERE sentiment IS NULL LIMIT 100"
                )
                briefs_to_process = cur.fetchall()

                if not briefs_to_process:
                    print("No new briefs to process.")
                    return

                print(f"Found {len(briefs_to_process)} briefs to analyze.")
                for content_hash, text in briefs_to_process:
                    try:
                        ner_doc = nlp_ner(text)
                        companies = ", ".join([ent.text for ent in ner_doc.ents]) or None
                        sentiment_result = sentiment_pipeline(text)
                        sentiment = sentiment_result[0]['label'].upper()

                        cur.execute(
                            """
                            UPDATE briefs
                            SET subject_company = %s, sentiment = %s, processed_at = NOW()
                            WHERE content_hash = %s
                            """,
                            (companies, sentiment, content_hash)
                        )
                        processed_count += 1
                    except Exception as e:
                        print(f"Error processing item {content_hash}: {e}")
                        conn.rollback()
    
    except Exception as e:
        print(f"A database error occurred: {e}")
    
    print(f"Processing complete. Analyzed {processed_count} brief(s).")

# Run the main function
analyze_and_save()

Found 100 briefs to analyze.


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Processing complete. Analyzed 100 brief(s).
