# Pruebas de recomendación

Ejecuta los componentes de `src/recommendation/` (vectorizador, perfiles, matcher y generador de reportes) usando los artículos en `data_example/` para validar el flujo end-to-end antes de integrar con la app principal.


In [1]:
from pathlib import Path
import json
import sys
from pprint import pprint

PROJECT_ROOT = Path("/home/ari/Collage/04-Forth_Year/Preimer_Semestre/PL/Final_Proj/Report_Generator")
DATA_DIR = PROJECT_ROOT / "data_example"

sys.path.append(str(PROJECT_ROOT))  # Permitir importaciones como src.recommendation.*

article_paths = sorted(DATA_DIR.glob("article_*.json"))
print(f"Artículos detectados: {len(article_paths)}")


def load_article(path: Path) -> dict:
    with open(path, encoding="utf-8") as fh:
        return json.load(fh)


def load_articles(limit: int | None = None) -> list[dict]:
    selected_paths = article_paths if limit is None else article_paths[:limit]
    return [load_article(p) | {"__path__": p} for p in selected_paths]

articles = load_articles(limit=10)
print(f"Usando {len(articles)} artículos para las pruebas")


Artículos detectados: 22
Usando 10 artículos para las pruebas


In [2]:
from vectorizer import NewsVectorizer

prepared_texts = []
article_ids = []
for article in articles:
    cleaned = article.get("preprocessing", {}).get("cleaned") or article.get("text", "")
    prepared_texts.append(cleaned)
    article_ids.append(article["__path__"].stem)

news_vectorizer = NewsVectorizer(max_features=2000, ngram_range=(1, 2))
article_matrix = news_vectorizer.fit_transform0(prepared_texts)
feature_names = news_vectorizer.get_feature_names()[:15]

article_vectors = {
    article_ids[i]: article_matrix[i] for i in range(len(article_ids))
}

print("Matriz TF-IDF:", article_matrix.shape)
print("Features de ejemplo:", feature_names)


Matriz TF-IDF: (10, 2000)
Features de ejemplo: []


In [3]:
from vectorizer import UserProfileVectorizer
from user_profile import UserProfileManager

profile_text = (
    "Me interesa seguir las crisis humanitarias en Medio Oriente, "
    "especialmente las operaciones militares que afectan a civiles y a la ayuda internacional."
)

profile_vectorizer = UserProfileVectorizer(news_vectorizer)
profile_manager = UserProfileManager(profile_vectorizer)
mock_profile = profile_manager.create_profile(profile_text)

print("Categorías detectadas:", mock_profile["categories"][:10])
print("Dimensión del vector:", len(mock_profile["vector"]))


Categorías detectadas: []
Dimensión del vector: 36


In [4]:
from matcher import NewsMatcher
import numpy as np

articles_for_matching = []
for article in articles:
    article_id = article["__path__"].stem
    vector = article_vectors.get(article_id)
    if vector is None:
        continue
    categories = article.get("regex_annotations", {}).get("categories") or article.get("tags", [])
    articles_for_matching.append({
        "id": article_id,
        "title": article.get("title"),
        "section": article.get("section"),
        "tags": article.get("tags", []),
        "categories": categories,
        "sentiment": article.get("sentiment"),
        "text": article.get("text", ""),
        "source_metadata": article.get("source_metadata", {}),
        "url": article.get("url"),
        "vector": vector.tolist()
    })

matcher = NewsMatcher()

first_article = articles_for_matching[0]
relevance_score = matcher.calculate_relevance(
    user_vector=np.array(mock_profile["vector"]),
    article_vector=np.array(first_article["vector"]),
    article_categories=first_article.get("categories", []),
    user_categories=mock_profile.get("categories", []),
    article_sentiment=first_article.get("sentiment"),
    article_section=first_article.get("section")
)
print("Score artículo 1:", relevance_score)

matched = matcher.match_articles(mock_profile, articles_for_matching, top_k=5)
for article, score, justification in matched:
    print(f"{article['id']} -> score {score:.3f} | categorías comunes: {justification['matching_categories']}")


Vectores de dimensiones diferentes, ajustando...
Vectores de dimensiones diferentes, ajustando...
Vectores de dimensiones diferentes, ajustando...
Vectores de dimensiones diferentes, ajustando...
Vectores de dimensiones diferentes, ajustando...
Vectores de dimensiones diferentes, ajustando...
Vectores de dimensiones diferentes, ajustando...
Vectores de dimensiones diferentes, ajustando...
Vectores de dimensiones diferentes, ajustando...
Vectores de dimensiones diferentes, ajustando...
Vectores de dimensiones diferentes, ajustando...


Score artículo 1: 0.4339538743836387
article_14 -> score 0.476 | categorías comunes: []
article_16 -> score 0.462 | categorías comunes: []
article_1 -> score 0.434 | categorías comunes: []
article_12 -> score 0.428 | categorías comunes: []
article_19 -> score 0.424 | categorías comunes: []


In [7]:
from src.summarization.summarizer import TextRankSummarizer, PersonalizedSummarizer
from src.recommendation.report_generator import ReportGenerator

text_rank = TextRankSummarizer(language="spanish")
personalized = PersonalizedSummarizer(text_rank)
report_generator = ReportGenerator(personalized)

report = report_generator.generate_report(matched, mock_profile, max_articles=3)
report_text = report_generator.format_report_text(report)

print("Artículos en el reporte:", report["articles_in_report"])
print(report_text.splitlines()[0:25])


Artículos en el reporte: 3
