In [None]:
from time import time
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from IPython.display import display, Markdown

model_path = "/kaggle/input/llama-3/transformers/8b-chat-hf/1"
text_pipeline = transformers.pipeline(
    "text-generation",
    model=model_path,
    torch_dtype=torch.float16,
    device_map="auto",
)

def generate_response(system_prompt, user_prompt, temp=0.7, max_len=1024):
    start_time = time()
    user_prompt = "Question: " + user_prompt + " Answer:"
    prompts = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]
    prompt_text = text_pipeline.tokenizer.apply_chat_template(
        prompts, 
        tokenize=False, 
        add_generation_prompt=True
    )
    
    eos_token_id = text_pipeline.tokenizer.eos_token_id

    if eos_token_id is None:
        raise ValueError("The tokenizer does not have an EOS token. Please check the tokenizer configuration.")
    
    generated_sequences = text_pipeline(
        prompt_text,
        do_sample=True,
        top_p=0.9,
        temperature=temp,
        eos_token_id=eos_token_id,
        max_new_tokens=max_len,
        return_full_text=False,
        pad_token_id=text_pipeline.model.config.eos_token_id
    )
    response_text = generated_sequences[0]['generated_text']
    
    response_text = response_text.split("assistant")[0].strip()
    
    end_time = time()
    total_time = f"Total time: {round(end_time - start_time, 2)} sec."

    return user_prompt + " " + response_text + " " + total_time


def highlight_text(text):
    for keyword, color in zip(["Reasoning", "Question", "Answer", "Total time"], ["blue", "red", "green", "magenta"]):
        text = text.replace(f"{keyword}:", f"\n\n**<font color='{color}'>{keyword}:</font>**")
    return text



In [None]:
from flask import Flask, request, jsonify

app = Flask(__name__)



@app.route('/statistics_reply', methods=['POST'])
def process_one():
    system_prompt = """
        You are a financial advisor focused on women empowerment. Your only response should be in HTML format, containing tables, charts, or lists  as needed. Do not provide explanations or add any extra text or commentary outside of the HTML structure.
    """    
    data = request.json
    prompt=data['prompt']
    result = generate_response(
    system_prompt,
    user_prompt=prompt,
    temp=0.1,
    max_len=256
    )
    return jsonify({"result": result})

@app.route('/get_reply', methods=['POST'])
def process_two():
    system_prompt = """
        You are a financial advisor helping with women empowerment. Only respond with the HTML code of the chart, and do not include any additional text or commentary.
    """

    data = request.json
    prompt=data['prompt']
    result = generate_response(
    system_prompt,
    user_prompt=prompt,
    temp=0.1,
    max_len=256
    )
    return jsonify({"result": result})

if __name__ == '__main__':
    app.run(debug=True)
