In [1]:
%pip install pymongo requests pandas

Note: you may need to restart the kernel to use updated packages.


In [2]:
from pymongo import MongoClient
from bson.json_util import dumps
import json
import os
import pandas as pd

In [3]:
class MongoDBHandler:
    def __init__(self):
        self.mongo_uri = (
            "mongodb://localhost:27017/"
        )
        self.db_name = "text_analysis"
        self.drake_path = "./drake_lyrics/drake_data.json"
        self.milei_path = "./milei_news"
        self.connect_mongo()

    def connect_mongo(self):
        try:
            client = MongoClient(self.mongo_uri)
            self.db = client[self.db_name] if client else None
            print(f"Conectado a la base de datos '{self.db_name}' en MongoDB.")
        except Exception as e:
            print(f"Error al conectar a MongoDB: {e}")

    def insert_drake(self):
        with open(self.drake_path, "r") as file:
            js = json.load(file) # cargamos el json, que es un array de documentos
            try:
                self.db.drake_lyrics.insert_many(js) # insertamos todo en mongo
                print("Inserted drake lyrics data")
            except Exception as e:
                print("Error when inserting drake data", e)

    def insert_milei(self):
        if not os.path.exists(self.milei_path):
            print("Milei data does not exist")
            return
        
        ctr: int = 0
        # buscamos todos los archivos de forma recursiva
        for root, _, files in os.walk(self.milei_path):
            for file in files:
                path = os.path.join(root, file)
                with open (path, "r") as json_file:
                    try:
                        doc = json.load(json_file) # cargamos el json
                        self.db.milei_news.insert_one(doc) # insertamos en mongo
                        print(f"Inserted document {ctr}")
                    except Exception as e:
                        print(f"Error when inserting document {ctr}", e)
                ctr += 1

            
    def create_indexes(self):
        self.db.drake_lyrics.create_index({"lyrics": "text"})
        self.db.milei_news.create_index({"title": "text", "summary": "text"})
        # creamos los indices en las lyrics para la coleccion de canciones de drake,
        # y creamos un índice compuesto en el título y resumen de los artículos sobre milei

    def drake_love_money(self):
        pipeline = {
            "$text": {
                "$search": "love money",
                "$caseSensitive": False
            }
        }
        res = self.db.drake_lyrics.find(pipeline).to_list()
        return pd.DataFrame(res)

    def milei_title_inflacion(self):
        pipeline = {
            "$text": {
                "$search": "inflacion inflación",
                "$caseSensitive": False
            },
        }
        res = self.db.milei_news.find(pipeline).to_list()
        return pd.DataFrame(res)

    def milei_summary_milei_dolar_not_inflacion(self):
        pipeline = {
            "$text": {
                "$search": "milei dolar dólar -inflación -inflacion",
                "$caseSensitive": False
            },
        }
        res = self.db.milei_news.find(pipeline).to_list()
        return pd.DataFrame(res)

In [4]:
handler = MongoDBHandler()

# Para insertar en mongo
# handler.insert_drake()
# handler.insert_milei()

# Para crear indices en mongo
# handler.create_indexes()

love_money = handler.drake_love_money()
print("Búsqueda love-money:", love_money[:3])
print("Total:", len(love_money))

milei_inflacion = handler.milei_title_inflacion()
print("Búsqueda inflación:", milei_inflacion[:3])
print("Total:", len(milei_inflacion))

milei_dolar_not_inflacion = handler.milei_summary_milei_dolar_not_inflacion()
print("Búsqueda milei, dólar not inflación:", milei_dolar_not_inflacion[:3])
print("Total:", len(milei_dolar_not_inflacion))



Conectado a la base de datos 'text_analysis' en MongoDB.
Búsqueda love-money:                         _id                 album  \
0  6865a851b64c705d23653f89      Unreleased Songs   
1  6865a851b64c705d23653e89  Dark Lane Demo Tapes   
2  6865a851b64c705d23653f7a      Unreleased Songs   

                               lyrics_title  \
0                     My Love (Demo) Lyrics   
1             From Florida With Love Lyrics   
2  From Florida with Love (Original) Lyrics   

                                          lyrics_url  \
0       https://genius.com/Drake-my-love-demo-lyrics   
1  https://genius.com/Drake-from-florida-with-lov...   
2  https://genius.com/Drake-from-florida-with-lov...   

                                              lyrics track_views  
0  [Verse 1]\nI had an idea in my head\nI picture...        None  
1  [Intro]\n(Plugg)\n(Bitch)\nAyy, yeah\n\n[Verse...      222.3K  
2  [Intro: Drake & Booggz]\n(Plug)\nI'm back, bab...       12.3K  
Total: 218
Búsqueda inflaci