In [None]:
from transformers import pipeline
from huggingface_hub import login
import torch
from flask import Flask, request, jsonify
from flask_cors import CORS
from pyngrok import ngrok

# Log in to Hugging Face (ensure you have your token)
login("Use your hugginface token")

# Set up device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the Llama 3.2 model
llama_32 = "meta-llama/Llama-3.2-1B-Instruct"
generator = pipeline(model=llama_32, device=device, torch_dtype=torch.bfloat16)

# Define a function to generate a response
def generate_response(message):
    prompt = [
        {"role": "system", "content": "You are a helpful assistant, that solves users queries and Problems. Give short and easy to understand response."},
        {"role": "user", "content": message},
    ]
    outputs = generator(
        prompt,
        do_sample=False,
        temperature=1.0,
        top_p=1,
        max_new_tokens=500
    )
    x = outputs[0]["generated_text"][-1]
    if x["role"] == "assistant":
        return x["content"]
    return "No valid response generated."

# Set up Flask app
app = Flask(__name__)
CORS(app)  # Enable CORS for all routes

# Define a Flask route for the API
@app.route("/generate", methods=["POST"])
def generate():
    # Get user input from the POST request
    data = request.json
    message = data.get("message", "")

    if not message:
        return jsonify({"error": "No message provided."}), 400

    # Generate a response using the Llama 3.2 model
    try:
        response = generate_response(message)
        return jsonify({"response": response})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":

    # Replace 'Use your ngrok token' with your ngrok authtoken
    ngrok.set_auth_token("Use your ngrok token")

    # Start ngrok and get the public URL
    public_url = ngrok.connect(5000)
    print(f"Public URL: {public_url}")

    # Run the Flask app
    app.run(port=5000)