#Ponderaci√≥n de caracter√≠sticas (TO y TF-IDF)

Objetivo: a partir de las matrices BoW del Punto 3 (con y sin stemming), generar dos esquemas de ponderaci√≥n: TO y TF-IDF. Guardamos artefactos y mostramos verificaci√≥n r√°pida.

4.1 Insumos del Punto 3

Cargamos bow_filtered.npz, bow_stemmed.npz y sus vocabularios (vocab_filtered.json, vocab_stem.json). Chequeos de forma y consistencia.

In [17]:
%pip install scipy scikit-learn


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [18]:
# configuraci√≥n de rutas

from pathlib import Path
import json
import numpy as np
import pandas as pd
from scipy import sparse
from IPython.display import display

# Rutas base
BASE_P4 = Path.cwd()  # este notebook
BASE_P3 = BASE_P4.parent / "3_caracteristicas_bow"
P3_MATS = BASE_P3 / "matrices"
P3_VOCS = BASE_P3 / "vocabularios"

# Carpeta de salida del Punto 4 
ARTIF_DIR = BASE_P4 / "artifacts_p4"
ARTIF_DIR.mkdir(parents=True, exist_ok=True)
print("üìÇ Guardaremos salidas en:", ARTIF_DIR.resolve())


üìÇ Guardaremos salidas en: C:\Users\jpveg\2025-2S\PLN\practica1-noticias-falsas\corpus\4_ponderacion\artifacts_p4


4.2 Ponderaci√≥n TO (frecuencia absoluta)

Publicamos las matrices BoW del punto 3 como TO (son equivalentes). Guardamos artefactos y generamos un preview chiquito en CSV para el informe.

In [19]:
# Cargar TODAS las variantes del Punto 3
# Mapeo exacto de archivos 
VARIANTS = {
    "none":              ("bow_raw.npz",                "vocab_raw.json"),              # sin t√©cnica
    "stopwords_only":    ("bow_stopwords.npz",          "vocab_stopwords.json"),        # solo stopwords
    "stemming_only":     ("bow_stem_only.npz",          "vocab_stem_only.json"),        # solo stemming
    "both_stop+stem":    ("bow_stemmed_stopwords.npz",  "vocab_stemmed_stopwords.json") # ambas
}

def assert_exists(path: Path, friendly: str):
    if not path.exists():
        raise FileNotFoundError(f"No encontr√© {friendly}: {path}")

def load_vocab_dict(fp: Path) -> dict:
    with fp.open("r", encoding="utf-8") as f:
        vocab = json.load(f)  # {termino: indice}
    if not isinstance(vocab, dict):
        raise ValueError(f"El vocabulario no es un dict: {fp}")
    return vocab

def vocab_to_term_array(vocab_dict: dict) -> np.ndarray:
    size = max(vocab_dict.values()) + 1 if vocab_dict else 0
    terms = [None] * size
    for t, i in vocab_dict.items():
        if i < 0 or i >= size:
            raise ValueError(f"√çndice fuera de rango en vocab: {t} -> {i}")
        terms[i] = t
    if any(x is None for x in terms):
        raise ValueError("Vocabulario con huecos en √≠ndices (no contiguos).")
    return np.array(terms, dtype=object)

Xs_bow = {}
terms_dict = {}

for key, (mat_name, voc_name) in VARIANTS.items():
    mat_path = P3_MATS / mat_name
    voc_path = P3_VOCS / voc_name
    assert_exists(mat_path, f"matriz {key}")
    assert_exists(voc_path, f"vocabulario {key}")
    X = sparse.load_npz(mat_path)
    vocab = load_vocab_dict(voc_path)
    terms = vocab_to_term_array(vocab)
    assert X.shape[1] == len(terms), f"[{key}] columnas={X.shape[1]} ‚â† vocab={len(terms)}"
    Xs_bow[key] = X
    terms_dict[key] = terms

print("‚úÖ Insumos cargados (4 combinaciones):")
for k in VARIANTS:
    X = Xs_bow[k]
    print(f"- {k:15s}  shape={X.shape}  nnz={X.nnz}  vocab={len(terms_dict[k])}")


‚úÖ Insumos cargados (4 combinaciones):
- none             shape=(7200, 37565)  nnz=571101  vocab=37565
- stopwords_only   shape=(7200, 16102)  nnz=247283  vocab=16102
- stemming_only    shape=(7200, 33949)  nnz=600607  vocab=33949
- both_stop+stem   shape=(7200, 10528)  nnz=240597  vocab=10528


In [20]:
# Previews de TO (BoW) por variante (10x20)
def tiny_matrix_preview(X, terms, n_rows=10, n_cols=20, ndigits=None, title=""):
    n_rows, n_cols = min(n_rows, X.shape[0]), min(n_cols, X.shape[1])
    df = pd.DataFrame(X[:n_rows, :n_cols].toarray(), columns=terms[:n_cols])
    if ndigits is not None:
        df = df.round(ndigits)
    if title:
        print(f"\nüñºÔ∏è {title}  (primeras {n_rows} filas √ó {n_cols} columnas)")
    display(df)
    return df

def top_terms_by_count(X, terms, k=15):
    counts = np.asarray(X.sum(axis=0)).ravel()
    idx = counts.argsort()[::-1][:k]
    return pd.DataFrame({"termino": terms[idx], "frecuencia": counts[idx]})

for key in ["none", "stopwords_only", "stemming_only", "both_stop+stem"]:
    X = Xs_bow[key]; terms = terms_dict[key]
    print(f"\n=== TO Preview ¬∑ {key} ¬∑ shape={X.shape} ¬∑ nnz={X.nnz} ===")
    tiny_matrix_preview(X, terms, n_rows=10, n_cols=20, ndigits=None, title=f"TO ‚Äì {key}")
    print("üîù Top 15 por frecuencia (TO):")
    display(top_terms_by_count(X, terms, k=15))



=== TO Preview ¬∑ none ¬∑ shape=(7200, 37565) ¬∑ nnz=571101 ===

üñºÔ∏è TO ‚Äì none  (primeras 10 filas √ó 20 columnas)


Unnamed: 0,de,la,el,que,en,del,los,las,por,un,para,se,con,una,no,al,su,ha,gobierno,es
0,7,5,3,2,5,1,3,3,0,0,0,1,1,1,0,0,0,0,0,0
1,3,6,2,2,1,1,1,0,2,0,0,1,0,0,1,1,0,0,0,0
2,8,2,1,0,1,1,4,1,1,0,0,0,0,0,0,0,0,0,0,0
3,5,4,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,0,0
4,3,3,3,5,3,0,1,1,1,0,1,0,2,0,1,0,1,3,0,1
5,8,4,5,0,3,0,2,1,1,0,1,0,1,0,0,2,0,1,0,0
6,3,2,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,3,0,0
7,4,3,2,0,1,1,2,0,0,0,1,0,2,0,0,0,0,0,0,0
8,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0
9,2,3,1,4,1,0,1,0,3,0,0,2,0,0,1,0,0,1,0,0


üîù Top 15 por frecuencia (TO):


Unnamed: 0,termino,frecuencia
0,de,35328
1,la,24532
2,el,19543
3,que,14374
4,en,14055
5,del,8449
6,los,8057
7,las,5599
8,de_la,5571
9,por,5155



=== TO Preview ¬∑ stopwords_only ¬∑ shape=(7200, 16102) ¬∑ nnz=247283 ===

üñºÔ∏è TO ‚Äì stopwords_only  (primeras 10 filas √ó 20 columnas)


Unnamed: 0,gobierno,mas,pp,catalunya,si,iniciativa,madrid,per,presidente,partido,vers,tras,equo,congreso,anos,ley,psoe,tambien,dos,ser
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,0,0,0,1,1,1,0,1,0,1,1,0,1,1,0,0,0,0,0,1
2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0
6,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


üîù Top 15 por frecuencia (TO):


Unnamed: 0,termino,frecuencia
0,gobierno,2347
1,mas,1638
2,pp,1303
3,catalunya,1198
4,si,1184
5,iniciativa,966
6,madrid,965
7,presidente,909
8,per,909
9,partido,882



=== TO Preview ¬∑ stemming_only ¬∑ shape=(7200, 33949) ¬∑ nnz=600607 ===

üñºÔ∏è TO ‚Äì stemming_only  (primeras 10 filas √ó 20 columnas)


Unnamed: 0,de,la,el,que,en,del,los,las,de_la,por,un,par,se,con,una,no,al,su,ha,en_el
0,7,5,3,2,5,1,3,3,2,0,0,0,1,1,1,0,0,0,0,0
1,3,6,2,2,1,1,1,0,0,2,0,0,1,0,0,1,1,0,0,1
2,8,2,1,0,1,1,4,1,1,1,0,0,0,0,0,0,0,0,0,0
3,5,4,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0
4,3,3,3,5,3,0,1,1,0,1,0,1,0,2,0,1,0,1,3,0
5,8,4,5,0,3,0,2,1,0,1,0,1,0,1,0,0,2,0,1,0
6,3,2,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,0,3,0
7,4,3,2,0,1,1,2,0,1,0,0,1,0,2,0,0,0,0,0,0
8,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,0
9,2,3,1,4,1,0,1,0,0,3,0,0,2,0,0,1,0,0,1,0


üîù Top 15 por frecuencia (TO):


Unnamed: 0,termino,frecuencia
0,de,35328
1,la,24532
2,el,19543
3,que,14375
4,en,14055
5,del,8449
6,los,8062
7,las,5600
8,de_la,5571
9,por,5155



=== TO Preview ¬∑ both_stop+stem ¬∑ shape=(7200, 10528) ¬∑ nnz=240597 ===

üñºÔ∏è TO ‚Äì both_stop+stem  (primeras 10 filas √ó 20 columnas)


Unnamed: 0,gobiern,part,mas,president,pp,cataluny,si,public,inici,nuev,madr,polit,social,per,ser,cas,vers,unid,per_cataluny,vers_per
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,1,0,0,0,1,1,0,1,0,0,0,0,1,2,0,1,0,1,1
2,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0
5,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0
6,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


üîù Top 15 por frecuencia (TO):


Unnamed: 0,termino,frecuencia
0,gobiern,2377
1,part,1712
2,mas,1646
3,president,1433
4,pp,1303
5,cataluny,1198
6,si,1184
7,public,1129
8,inici,1128
9,nuev,1103


In [21]:
# Calcular y exportar TF-IDF (4 variantes)
from sklearn.feature_extraction.text import TfidfTransformer

tfidf_cfg = dict(use_idf=True, smooth_idf=True, sublinear_tf=False, norm="l2")
X_tfidf_all = {}  # por si quieres reutilizar en el propio notebook

for key, X in Xs_bow.items():
    terms = terms_dict[key]
    tfidf = TfidfTransformer(**tfidf_cfg)
    X_tfidf = tfidf.fit_transform(X)
    X_tfidf_all[key] = X_tfidf

    # Matriz TF-IDF
    fn_mat = ARTIF_DIR / f"X_TFIDF_{key}.npz"
    sparse.save_npz(fn_mat, X_tfidf)

    # Guardar IDF y vocab juntos (√∫til para an√°lisis)
    fn_idf = ARTIF_DIR / f"idf_{key}.npz"
    np.savez_compressed(fn_idf, terms=terms, idf=tfidf.idf_)

    print(f"‚úÖ TF-IDF guardado -> {fn_mat.name} | {fn_idf.name} | shape={X_tfidf.shape} | nnz={X_tfidf.nnz}")


‚úÖ TF-IDF guardado -> X_TFIDF_none.npz | idf_none.npz | shape=(7200, 37565) | nnz=571101
‚úÖ TF-IDF guardado -> X_TFIDF_stopwords_only.npz | idf_stopwords_only.npz | shape=(7200, 16102) | nnz=247283
‚úÖ TF-IDF guardado -> X_TFIDF_stemming_only.npz | idf_stemming_only.npz | shape=(7200, 33949) | nnz=600607
‚úÖ TF-IDF guardado -> X_TFIDF_both_stop+stem.npz | idf_both_stop+stem.npz | shape=(7200, 10528) | nnz=240597


In [22]:
# Previews TF-IDF e IDF (10x20 + Top IDF)
for key in ["none", "stopwords_only", "stemming_only", "both_stop+stem"]:
    X_tfidf = X_tfidf_all[key]
    terms   = terms_dict[key]
    print(f"\n=== TF-IDF Preview ¬∑ {key} ¬∑ shape={X_tfidf.shape} ¬∑ nnz={X_tfidf.nnz} ===")
    tiny_matrix_preview(X_tfidf, terms, n_rows=10, n_cols=20, ndigits=4, title=f"TF-IDF ‚Äì {key}")

    # Top 10 IDF
    idf_pack = np.load(ARTIF_DIR / f"idf_{key}.npz", allow_pickle=True)
    idf_df = pd.DataFrame({"termino": idf_pack["terms"], "idf": idf_pack["idf"]}) \
                .sort_values("idf", ascending=False) \
                .reset_index(drop=True)
    print("üîé Top 10 t√©rminos con mayor IDF:")
    display(idf_df.head(10))



=== TF-IDF Preview ¬∑ none ¬∑ shape=(7200, 37565) ¬∑ nnz=571101 ===

üñºÔ∏è TF-IDF ‚Äì none  (primeras 10 filas √ó 20 columnas)


Unnamed: 0,de,la,el,que,en,del,los,las,por,un,para,se,con,una,no,al,su,ha,gobierno,es
0,0.1228,0.092,0.0569,0.0435,0.1086,0.0251,0.0803,0.0936,0.0,0.0,0.0,0.0328,0.0337,0.0353,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0512,0.1073,0.0368,0.0423,0.0211,0.0244,0.026,0.0,0.0619,0.0,0.0,0.0319,0.0,0.0,0.0358,0.034,0.0,0.0,0.0,0.0
2,0.1771,0.0464,0.0239,0.0,0.0274,0.0317,0.1351,0.0394,0.0402,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.1064,0.0892,0.023,0.0264,0.0,0.0305,0.0,0.0,0.0386,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.045,0.0,0.0,0.0
4,0.0455,0.0477,0.0491,0.094,0.0562,0.0,0.0231,0.0269,0.0275,0.0,0.0275,0.0,0.0582,0.0,0.0318,0.0,0.0321,0.0986,0.0,0.0415
5,0.136,0.0713,0.0918,0.0,0.0631,0.0,0.0519,0.0302,0.0308,0.0,0.0308,0.0,0.0326,0.0,0.0,0.0677,0.0,0.0369,0.0,0.0
6,0.066,0.0462,0.0,0.0,0.0272,0.0315,0.0,0.0,0.0,0.0414,0.0,0.0411,0.0423,0.0,0.0462,0.0,0.0,0.1433,0.0,0.0
7,0.0782,0.0615,0.0422,0.0,0.0242,0.028,0.0596,0.0,0.0,0.0,0.0355,0.0,0.0751,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0325,0.034,0.0,0.0,0.0,0.0,0.0495,0.0,0.0,0.061,0.0,0.0,0.0,0.0,0.0,0.0646,0.0687,0.0,0.0,0.0
9,0.0421,0.0662,0.0227,0.1045,0.0261,0.0,0.0321,0.0,0.1146,0.0,0.0,0.0787,0.0,0.0,0.0442,0.0,0.0,0.0457,0.0,0.0


üîé Top 10 t√©rminos con mayor IDF:


Unnamed: 0,termino,idf
0,bicarbonato,9.188828
1,suaves,9.188828
2,del_gol,9.188828
3,es_causado,9.188828
4,militar_central,9.188828
5,en_paraguay,9.188828
6,que_tiktok,9.188828
7,escalera,9.188828
8,en_atletas,9.188828
9,conexion_migrante,9.188828



=== TF-IDF Preview ¬∑ stopwords_only ¬∑ shape=(7200, 16102) ¬∑ nnz=247283 ===

üñºÔ∏è TF-IDF ‚Äì stopwords_only  (primeras 10 filas √ó 20 columnas)


Unnamed: 0,gobierno,mas,pp,catalunya,si,iniciativa,madrid,per,presidente,partido,vers,tras,equo,congreso,anos,ley,psoe,tambien,dos,ser
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0869,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0729,0.0779,0.0783,0.0,0.08,0.0,0.0767,0.0817,0.0,0.0812,0.0793,0.0,0.0,0.0,0.0,0.0,0.0898
2,0.0,0.1052,0.1049,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.128,0.0,0.264,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0733,0.0,0.0,0.0,0.0822,0.0,0.0,0.0812,0.0,0.0,0.0,0.0,0.0927,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0873,0.0,0.0,0.0,0.0,0.0977,0.0,0.0,0.0965,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0883,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


üîé Top 10 t√©rminos con mayor IDF:


Unnamed: 0,termino,idf
0,damon_slayer,9.188828
1,montano,9.188828
2,dal_poggetto,9.188828
3,demostro_ser,9.188828
4,higuita,9.188828
5,yerri_estrada,9.188828
6,liverpool,9.188828
7,leporino,9.188828
8,vacunas_covid,9.188828
9,marketplace,9.188828



=== TF-IDF Preview ¬∑ stemming_only ¬∑ shape=(7200, 33949) ¬∑ nnz=600607 ===

üñºÔ∏è TF-IDF ‚Äì stemming_only  (primeras 10 filas √ó 20 columnas)


Unnamed: 0,de,la,el,que,en,del,los,las,de_la,por,un,par,se,con,una,no,al,su,ha,en_el
0,0.1187,0.0889,0.0549,0.042,0.1049,0.0243,0.0776,0.0904,0.0585,0.0,0.0,0.0,0.0317,0.0326,0.0341,0.0,0.0,0.0,0.0,0.0
1,0.0524,0.1098,0.0377,0.0433,0.0216,0.025,0.0266,0.0,0.0,0.0633,0.0,0.0,0.0326,0.0,0.0,0.0366,0.0348,0.0,0.0,0.0393
2,0.1838,0.0482,0.0248,0.0,0.0284,0.0329,0.1402,0.0408,0.0397,0.0417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.1081,0.0907,0.0234,0.0268,0.0,0.031,0.0,0.0,0.0373,0.0392,0.0406,0.0,0.0,0.0,0.0,0.0,0.0,0.0458,0.0,0.0
4,0.0469,0.0491,0.0506,0.0969,0.058,0.0,0.0238,0.0278,0.0,0.0284,0.0,0.0282,0.0,0.06,0.0,0.0328,0.0,0.0331,0.1017,0.0
5,0.1319,0.0692,0.089,0.0,0.0612,0.0,0.0503,0.0293,0.0,0.0299,0.0,0.0298,0.0,0.0317,0.0,0.0,0.0657,0.0,0.0358,0.0
6,0.0661,0.0462,0.0,0.0,0.0273,0.0315,0.0,0.0,0.038,0.0,0.0414,0.0,0.0412,0.0423,0.0,0.0462,0.0,0.0,0.1434,0.0
7,0.0809,0.0636,0.0437,0.0,0.025,0.0289,0.0617,0.0,0.0349,0.0,0.0,0.0365,0.0,0.0776,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0302,0.0317,0.0,0.0,0.0,0.0,0.046,0.0,0.0,0.0,0.0567,0.0,0.0,0.0,0.0,0.0,0.0601,0.0639,0.0,0.0
9,0.0444,0.0698,0.024,0.1101,0.0275,0.0,0.0338,0.0,0.0,0.1208,0.0,0.0,0.0829,0.0,0.0,0.0466,0.0,0.0,0.0481,0.0


üîé Top 10 t√©rminos con mayor IDF:


Unnamed: 0,termino,idf
0,hospital_milit,9.188828
1,por_inter,9.188828
2,perry,9.188828
3,yerri_estrad,9.188828
4,zanganeh,9.188828
5,que_comp,9.188828
6,que_morat,9.188828
7,que_tiktok,9.188828
8,que_saludcoop,9.188828
9,ran,9.188828



=== TF-IDF Preview ¬∑ both_stop+stem ¬∑ shape=(7200, 10528) ¬∑ nnz=240597 ===

üñºÔ∏è TF-IDF ‚Äì both_stop+stem  (primeras 10 filas √ó 20 columnas)


Unnamed: 0,gobiern,part,mas,president,pp,cataluny,si,public,inici,nuev,madr,polit,social,per,ser,cas,vers,unid,per_cataluny,vers_per
0,0.0,0.0804,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0668,0.0,0.0,0.0,0.0774,0.0828,0.0,0.0788,0.0,0.0,0.0,0.0,0.0844,0.168,0.0,0.0864,0.0,0.0867,0.0869
2,0.0973,0.0,0.1241,0.0,0.124,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1361,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1884,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0731,0.0,0.0,0.0756,0.0,0.0,0.0799,0.0,0.0,0.0,0.0
5,0.0,0.0725,0.0,0.0,0.0799,0.0,0.0,0.0,0.0,0.0,0.0888,0.0898,0.0,0.0,0.0,0.0927,0.0,0.0,0.0,0.0
6,0.0,0.0872,0.0962,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1068,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1572,0.0,0.0,0.0813,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0973,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


üîé Top 10 t√©rminos con mayor IDF:


Unnamed: 0,termino,idf
0,nuev_medicament,9.188828
1,nostradamus,9.188828
2,nuev_empres,9.188828
3,ningun_papel,9.188828
4,nivel_asistencial,9.188828
5,omeg,9.188828
6,oncolog,9.188828
7,musgrav,9.188828
8,mean,9.188828
9,medi_oficial,9.188828
