### Installing all necessary dependencies

In [None]:
%%capture
!python -m spacy download es_core_news_lg
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git

### Import the dependencies

In [None]:
import torch
import spacy
import re
import pandas as pd
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

### NER for preprocessing the news to recover the location

In [None]:
nlp = spacy.load("es_core_news_lg")

In [None]:
def extract_first_loc(text):
    doc = nlp(text)
    first_loc = next((ent.text for ent in doc.ents if ent.label_ == "LOC"), None) # Find the first entity of type LOC

    return first_loc

### Processing the news and generation function

In [None]:
def generar_nueva_noticia_gpt2(noticia, fecha):
    loc = extract_first_loc(noticia)

    if(loc):
            prompt = noticia + ". " + "Fecha: " + fecha + "." + "Lugar: " + loc + "." "->:"
    else:
            prompt = noticia + ". " + "Fecha: " + fecha + "->:"

    # Defines a prompt for text generation based on the processed news
    input_text = prompt

    # Tokenizes the input text
    batch = tokenizer(input_text, return_tensors='pt').to('cuda')

    # Generates text with the model, adjusting the temperature and other parameters for creativity
    with torch.cuda.amp.autocast():
        output_tokens = model.generate(
            **batch,
            max_new_tokens=200,
            temperature=0.9,  # Adjusts the temperature to increase creativity
            top_k=50,         # Limits to the top 50 most likely tokens for each selection
            top_p=0.95,       # Uses nucleus sampling with a p value of 0.95
            repetition_penalty=1.2  # Applies a penalty to repeated words to reduce repetition
        )

    # Decodes and displays the result
    generated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)

    # Use a regular expression to find the text after '### Response:'
    response_text = re.search(r'->:\s*(.*)', generated_text, re.DOTALL)

    # If a match is found, print the text
    if response_text:
        return response_text.group(1).strip()

### Loading the model

In [None]:
peft_model_id = "BrauuHdzM/fine-tuned-noticias-gpt2-spanishstories-NER"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

### Generate the news

In [None]:
# Give information about the news. This is an example!
noticia = "Melbourne. El español Carlos Sainz (Ferrari), ganador del Gran Premio de Australia de Fórmula Uno, aseguró que confiaba en tener la capacidad de superar al tricampeón mundial y actual líder de la competencia, el neerlandés Max Verstappen (Red Bull), hoy en la tercera prueba de la temporada, e indicó que su triunfo demuestra que nunca hay que darse por vencido."
fecha = "25 de marzo del 2024"

In [None]:
generar_nueva_noticia_gpt2(noticia, fecha)