In [110]:
import pandas as pd
import boto3
import json
from dotenv import load_dotenv
import time
import json
import random
import os
from tenacity import retry, wait_exponential, stop_after_attempt, retry_if_exception_type
import botocore.exceptions

In [93]:
def load():
    path = '../data/credit_risk_imputed.xlsx'
    data = pd.read_excel(path)
    
    print(data.info())

    return data

In [94]:
BATCH_SIZE = 5
MODEL_ID = "anthropic.claude-3-sonnet-20240229-v1:0"
REGION = 'sa-east-1'

In [113]:
def construir_prompt_en(batch):
    prompt = (
        "Below are profiles of credit applicants. For each profile, generate a brief, professional summary describing their credit situation. "
        f"Return the summaries numbered from 1 to {len(batch)}.\n\n"
    )
    for i, (_, row) in enumerate(batch.iterrows(), 1):
        prompt += (
            f"{i}.\n"
            f"Age: {row['Age']} years old\n"
            f"Sex: {row['Sex']}\n"
            f"Job level: {row['Job']}\n"
            f"Housing: {row['Housing']}\n"
            f"Savings account: {row['Saving accounts']}\n"
            f"Checking account: {row['Checking account']}\n"
            f"Credit amount: {row['Credit amount']}\n"
            f"Loan duration: {row['Duration']} months\n"
            f"Purpose: {row['Purpose']}\n\n"
        )
    return prompt

In [112]:
# Crear cliente Bedrock
bedrock = boto3.client("bedrock-runtime", region_name=REGION)

# Función con retry automático para manejar throttling
@retry(
    wait=wait_exponential(multiplier=1, min=2, max=10),
    stop=stop_after_attempt(6),
    retry=retry_if_exception_type(botocore.exceptions.ClientError)
)
def obtener_respuesta(prompt):
    response = bedrock.invoke_model(
        modelId=MODEL_ID,
        contentType="application/json",
        accept="application/json",
        body=json.dumps({
            "messages": [
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            "max_tokens": 800,
            "temperature": 0.7,
            "anthropic_version": "bedrock-2023-05-31"
        })
    )
    result = json.loads(response['body'].read())
    return result['content'][0]['text']

In [111]:
def main(chunk_size=5):
    load_dotenv()
    df = load()
    descripciones = []

    for i in range(0, len(df), chunk_size):
        batch = df.iloc[i:i+chunk_size]
        prompt = construir_prompt_en(batch)
        print(f"Procesando filas {i + 1} a {i + len(batch)}...")

        try:
            respuesta = obtener_respuesta(prompt)
            descripciones_batch = respuesta.strip().split("\n")
            descripciones.extend([d.strip() for d in descripciones_batch if d.strip()])
        except Exception as e:
            print(f"Error en lote {i}–{i+chunk_size}: {e}")
            descripciones.extend(["ERROR"] * len(batch))

        time.sleep(2)  # descanso entre lotes para evitar throttling

    df["Description"] = descripciones
    df.to_csv("credit_data_with_descriptions.csv", index=False)
    print("Archivo exportado TOTALMENTE")


In [114]:
main(chunk_size=5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Age               1000 non-null   int64 
 1   Sex               1000 non-null   object
 2   Job               1000 non-null   int64 
 3   Housing           1000 non-null   object
 4   Saving accounts   1000 non-null   object
 5   Checking account  1000 non-null   object
 6   Credit amount     1000 non-null   int64 
 7   Duration          1000 non-null   int64 
 8   Purpose           1000 non-null   object
dtypes: int64(4), object(5)
memory usage: 70.4+ KB
None
Procesando filas 1 a 5...
Procesando filas 6 a 10...
Procesando filas 11 a 15...
Procesando filas 16 a 20...
Procesando filas 21 a 25...
Procesando filas 26 a 30...
Procesando filas 31 a 35...
Procesando filas 36 a 40...
Procesando filas 41 a 45...
Procesando filas 46 a 50...
Procesando filas 51 a 55...
Procesando filas 56 a 60...
