In [None]:
!pip install -U flask-cors
!pip install pyngrok

In [None]:
from transformers import pipeline
from huggingface_hub import login
import torch
from flask import Flask, request, jsonify
from flask_cors import CORS
from pyngrok import ngrok
import google.generativeai as genai

# Log in to Hugging Face (ensure you have your token)
login("Use your hugginface token")

# Set up device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the Llama 3.2 model
llama_32 = "meta-llama/Llama-3.2-1B-Instruct"
generator_1 = pipeline(model=llama_32, device=device, torch_dtype=torch.bfloat16)

mistral_3b = "ministral/Ministral-3b-instruct"
generator_2 = pipeline(model=mistral_3b, device=device, torch_dtype=torch.bfloat16)

history = []

# Define function for Llama 3.2 model
def generate_llama_response(message):
    context_prompt = "You are a helpful assistant. Provide concise answers.\n\n"

    # Compile history into the context for the prompt
    for messages in history:
        role = "User" if messages["role"] == "user" else "Assistant"
        context_prompt += f"{role}: {messages['content']}\n"

    # Add the latest user message
    context_prompt += f"User: {message}\nAssistant:"

    prompt = [
        #{"role": "system", "content": context_prompt},
        {"role": "user", "content": context_prompt},
    ]

    outputs = generator_1(
        prompt,
        do_sample=False,
        temperature=1.0,
        top_p=1,
        max_new_tokens=500
    )
    x = outputs[0]["generated_text"][-1]
    if x["role"] == "assistant":
        history.append({"role": "user", "content": message})
        history.append({"role": "assistant", "content": x["content"]})
        return x["content"]
    return "No valid response generated."


# Define function for Gemini model
def generate_gemini_response(message):
    context_prompt = "You are a helpful assistant. Provide concise answers.\n\n"

    # Compile history into the context for the prompt
    for messages in history:
        role = "User" if messages["role"] == "user" else "Assistant"
        context_prompt += f"{role}: {messages['content']}\n"

    # Add the latest user message
    context_prompt += f"User: {message}\nAssistant:"
    # Replace 'Use your Gemini API Key' with your API Key
    genai.configure(api_key="Use your Gemini API Key")
    model = genai.GenerativeModel("gemini-1.5-flash")
    response = model.generate_content(context_prompt)

    history.append({"role": "user", "content": message})
    history.append({"role": "assistant", "content": response.text})
    
    return(response.text)


# Define function for Mistral model
def generate_mistral_response(message):
    context_prompt = "You are a helpful assistant. Provide concise answers.\n\n"

    # Compile history into the context for the prompt
    for messages in history:
        role = "User" if messages["role"] == "user" else "Assistant"
        context_prompt += f"{role}: {messages['content']}\n"

    # Add the latest user message
    context_prompt += f"User: {message}\nAssistant:"

    prompt = [
        #{"role": "system", "content": context_prompt},
        {"role": "user", "content": context_prompt},
    ]

    outputs = generator_1(
        prompt,
        do_sample=False,
        temperature=1.0,
        top_p=1,
        max_new_tokens=500
    )
    x = outputs[0]["generated_text"][-1]
    if x["role"] == "assistant":
        history.append({"role": "user", "content": message})
        history.append({"role": "assistant", "content": x["content"]})
        return x["content"]
    return "No valid response generated."

# Set up Flask app
app = Flask(__name__)
CORS(app)

# Route to handle requests
@app.route("/generate", methods=["POST"])
def generate():
    # Get user input from the POST request
    data = request.json
    message = data.get("message", "")
    model = data.get("model", "llama")  # Default to Llama

    if not message:
        return jsonify({"error": "No message provided."}), 400

    try:
        # Call the appropriate function based on the model
        if model == "llama":
            response = generate_llama_response(message)
        elif model == "gemini":
            response = generate_gemini_response(message)
        elif model == "mistral":
            response = generate_mistral_response(message)
        else:
            return jsonify({"error": f"Model '{model}' not supported."}), 400

        return jsonify({"response": response})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    # Set up ngrok to expose the Flask app
    from pyngrok import ngrok

    # Replace 'Use your ngrok token' with your ngrok authtoken
    ngrok.set_auth_token("Use your ngrok token")

    # Start ngrok and get the public URL
    public_url = ngrok.connect(5000)
    print(f"Public URL: {public_url}")

    # Run the Flask app
    app.run(port=5000)
