### MongoDB

In [None]:
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
import os
from bson.json_util import dumps

def GetCollectionData(database: str, collection: str):
    uri = os.environ.get("MONGODB_URI")
    client = MongoClient(uri, server_api=ServerApi('1'))
    db = client[database]
    collection = db[collection]

    documents = list(collection.find())

    for document in documents:
        document.pop('_id', None)

    json_data = dumps(documents)
    return json_data

In [None]:
import pandas as pd


stockPrices = GetCollectionData("Stocks", "Prices")
stockPrices = pd.read_json(stockPrices)
stockPrices.to_csv("data/stockPrices.csv", index=False)

In [None]:
stockInfo = GetCollectionData("Stocks", "Info")
stockInfo = pd.read_json(stockInfo)
stockInfo.to_csv("data/stockInfo.csv", index=False)

In [None]:
dolarPrices = GetCollectionData("Indicators", "Dollar")
dolarPrices = pd.read_json(dolarPrices)
dolarPrices.to_csv("data/dollar.csv", index=False)

ipcaPrices = GetCollectionData("Indicators", "IPCA")
ipcaPrices = pd.read_json(ipcaPrices)
ipcaPrices.to_csv("data/ipca.csv", index=False)

selicPrices = GetCollectionData("Indicators", "SELIC")
selicPrices = pd.read_json(selicPrices)
selicPrices.to_csv("data/selic.csv", index=False)

### LLM

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "bigscience/bloom-560m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

prompt = "Quais são os benefícios de trabalhar com modelos de linguagem grandes?"
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_length=100)
print(tokenizer.decode(outputs[0]))



In [None]:
from transformers import pipeline, set_seed

# Configuração
model_name = "gpt2"  # Nome do modelo
seed = 10  # Para resultados reproduzíveis

# Inicializando o pipeline de geração de texto
generator = pipeline("text-generation", model=model_name)
set_seed(seed)

# Entrada do usuário
prompt = "Quais são os benefícios de trabalhar com modelos de linguagem grandes?"

# Gerar texto
output = generator(prompt, truncation=True, max_length=100, num_return_sequences=1)

# Exibir o resultado
for i, text in enumerate(output):
    print(f"Texto gerado {i + 1}:")
    print(text["generated_text"])


In [None]:
def gpt(message: str) -> dict:
    generator = pipeline("text-generation", model="gpt2-large")
    return generator(message)


message = "What are the benefits of working with large language models?"
response = gpt(message)

print(response)