In [1]:
!pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121


In [2]:
from google.colab import userdata


from huggingface_hub import hf_hub_download

# authenticate if gated
from huggingface_hub import login
login(userdata.get('huggingface_login'))  # replace with your Hugging Face token

# Download the GGUF file
model_path = hf_hub_download(
    repo_id="cropinailab/aksara_v1_GGUF",
    filename="aksara_v1.Q4_K_M.gguf"  # 4-bit quantized model
)

# print(model_path)

In [None]:
from llama_cpp import Llama

path = "/root/.cache/huggingface/hub/models--cropinailab--aksara_v1_GGUF/snapshots/86739646119395b99ea7afd631ce6831a696d8e5/aksara_v1.Q4_K_M.gguf"

# Load the model (already working in your setup)
llm = Llama(
    model_path=path,
    n_gpu_layers=-1,    # Offload all layers to GPU
    n_ctx=4096,         # Context length
    chat_format="mistral-instruct"
)

# Chat history
messages = []

print("ü§ñ Aksara Chatbot is ready! Type 'exit' to stop.\n")

while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("üëã Goodbye!")
        break

    messages.append({"role": "user", "content": user_input})

    output = llm.create_chat_completion(
        messages=messages,
        max_tokens=512,
        temperature=0.7
    )

    reply = output["choices"][0]["message"]["content"]
    print(f"Aksara: {reply}\n")

    messages.append({"role": "assistant", "content": reply})


In [4]:
!pip install flask flask-cors pyngrok

In [5]:
from flask import Flask, request, jsonify, Response
from llama_cpp import Llama
from flask_cors import CORS
from pyngrok import ngrok
import os

# -----------------------------
# CONFIG
# -----------------------------
MODEL_PATH = "/root/.cache/huggingface/hub/models--cropinailab--aksara_v1_GGUF/snapshots/86739646119395b99ea7afd631ce6831a696d8e5/aksara_v1.Q4_K_M.gguf"

os.environ["LLAMA_LOG_LEVEL"] = "ERROR"

# Load model once
llm = Llama(
    model_path=MODEL_PATH,
    n_gpu_layers=-1,
    n_ctx=4096,
    chat_format="mistral-instruct",
    verbose=False
)

app = Flask(__name__)
CORS(app)

# -----------------------------
# AGGRESSIVE SYSTEM PROMPT
# -----------------------------
system_prompt = """You are Aksara, a highly strict farming AI assistant. You MUST follow this EXACT format for ALL responses without exception:

MANDATORY FORMAT:
üåæ **[Your main answer here]**

üìã **Key Points:**
‚Ä¢ [Point 1]
‚Ä¢ [Point 2]
‚Ä¢ [Point 3]

üí° **Pro Tip:** [Additional insight]

STRICT RULES YOU MUST FOLLOW:
1. ALWAYS start with üåæ emoji. Never skip it.
2. ALWAYS use üìã **Key Points:** with bullet points (‚Ä¢). No other symbols.
3. ALWAYS end with üí° **Pro Tip:**. Never leave it empty.
4. Respond ONLY about farming, crops, soil, irrigation, pests, or agriculture. Never answer off-topic questions.
5. Responses must be **complete, coherent, and not cut off**.
6. NEVER mix languages in a single response.
7. If asked for Hindi or Devanagari, respond ENTIRELY in **proper Hindi/Devanagari script**. Never use Roman letters or English words for Hindi terms.
8. Technical terms like Nitrogen, Phosphorus, Potassium, soil, fertilizer, irrigation, etc., must be translated to Hindi whenever the response is in Hindi.
9. Do not provide additional commentary or disclaimers outside the mandatory format.
10. If the user asks ‚Äúhello‚Äù or greets, respond exactly:

üåæ **Hello! I'm Aksara, your farming AI assistant.**

üìã **I can help with:**
‚Ä¢ Crop planting and care
‚Ä¢ Soil health management
‚Ä¢ Pest and disease control
‚Ä¢ Irrigation and farming techniques

üí° **Pro Tip:** Ask me specific farming questions for the best advice!

LANGUAGE RULES:
- English questions ‚Üí respond fully in English, strictly following the format.
- Hindi questions OR explicitly asking for Hindi ‚Üí respond fully in Hindi using proper Devanagari script. Translate all technical terms into Hindi. No Romanization, no English words mixed in.
- NEVER switch languages mid-response.

EXAMPLES:
English:
üåæ **Tomatoes grow best in well-drained soil.**

üìã **Key Points:**
‚Ä¢ Ensure soil is loose and fertile
‚Ä¢ Water regularly but do not overwater
‚Ä¢ Protect from pests

üí° **Pro Tip:** Mulching helps retain soil moisture and reduce weeds.

Hindi:
üåæ **‡§ü‡§Æ‡§æ‡§ü‡§∞ ‡§Ö‡§ö‡•ç‡§õ‡•Ä ‡§§‡§∞‡§π ‡§∏‡•á ‡§ú‡§≤ ‡§®‡§ø‡§ï‡§æ‡§∏‡•Ä ‡§µ‡§æ‡§≤‡•Ä ‡§Æ‡§ø‡§ü‡•ç‡§ü‡•Ä ‡§Æ‡•á‡§Ç ‡§â‡§ó‡§§‡•á ‡§π‡•à‡§Ç‡•§**

üìã **‡§Æ‡•Å‡§ñ‡•ç‡§Ø ‡§¨‡§ø‡§Ç‡§¶‡•Å:**
‚Ä¢ ‡§Æ‡§ø‡§ü‡•ç‡§ü‡•Ä ‡§ï‡•ã ‡§¢‡•Ä‡§≤‡§æ ‡§î‡§∞ ‡§â‡§∞‡•ç‡§µ‡§∞‡§ï‡§Ø‡•Å‡§ï‡•ç‡§§ ‡§∞‡§ñ‡•á‡§Ç
‚Ä¢ ‡§®‡§ø‡§Ø‡§Æ‡§ø‡§§ ‡§∞‡•Ç‡§™ ‡§∏‡•á ‡§™‡§æ‡§®‡•Ä ‡§¶‡•á‡§Ç ‡§≤‡•á‡§ï‡§ø‡§® ‡§Ö‡§ß‡§ø‡§ï ‡§® ‡§¶‡•á‡§Ç
‚Ä¢ ‡§ï‡•Ä‡§ü‡•ã‡§Ç ‡§∏‡•á ‡§∏‡•Å‡§∞‡§ï‡•ç‡§∑‡§æ ‡§ï‡§∞‡•á‡§Ç

üí° **‡§∏‡•Å‡§ù‡§æ‡§µ:** ‡§Æ‡§≤‡•ç‡§ö‡§ø‡§Ç‡§ó ‡§Æ‡§ø‡§ü‡•ç‡§ü‡•Ä ‡§ï‡•Ä ‡§®‡§Æ‡•Ä ‡§¨‡§®‡§æ‡§è ‡§∞‡§ñ‡§®‡•á ‡§î‡§∞ ‡§ñ‡§∞‡§™‡§§‡§µ‡§æ‡§∞ ‡§ï‡§Æ ‡§ï‡§∞‡§®‡•á ‡§Æ‡•á‡§Ç ‡§Æ‡§¶‡§¶ ‡§ï‡§∞‡§§‡§æ ‡§π‡•à‡•§

NEVER break this format. Your responses must **always** include üåæ, üìã Key Points, and üí° Pro Tip exactly as shown, with proper translations when in Hindi. Do not omit anything."""

# Initialize with system prompt
messages = [{"role": "system", "content": system_prompt}]

# -----------------------------
# ENHANCED RESPONSE PROCESSING
# -----------------------------
def ensure_proper_format(response_text):
    """Ensure response follows the required format"""

    # If response doesn't start with üåæ, add the format
    if not response_text.strip().startswith("üåæ"):
        # Try to reformat the response
        lines = response_text.strip().split('\n')
        formatted_response = f"üåæ **{lines[0]}**\n\nüìã **Key Points:**\n"

        # Convert any existing bullet points or numbered lists
        for line in lines[1:]:
            if line.strip() and (line.strip().startswith('‚Ä¢') or line.strip().startswith('-') or line.strip().startswith('*')):
                formatted_response += f"‚Ä¢ {line.strip().lstrip('‚Ä¢-*').strip()}\n"
            elif line.strip() and any(line.strip().startswith(str(i)) for i in range(1, 10)):
                formatted_response += f"‚Ä¢ {line.strip()[2:].strip()}\n"

        formatted_response += "\nüí° **Pro Tip:** Feel free to ask more specific farming questions!"
        return formatted_response

    return response_text

# -----------------------------
# STREAMING CHAT WITH FORMATTING
# -----------------------------
@app.route("/chat_stream", methods=["POST"])
def chat_stream():
    global messages
    user_input = request.json.get("message", "").strip()

    # Handle greetings with pre-formatted response
    if user_input.lower() in ["hello", "hi", "hey", "hello there", "hi there"]:
        greeting_response = """üåæ **Hello! I'm Aksara, your farming AI assistant.**

üìã **I can help with:**
‚Ä¢ Crop planting and care
‚Ä¢ Soil health management
‚Ä¢ Pest and disease control
‚Ä¢ Irrigation and farming techniques

üí° **Pro Tip:** Ask me specific farming questions for the best advice!"""

        messages.append({"role": "user", "content": user_input})
        messages.append({"role": "assistant", "content": greeting_response})

        def stream_greeting():
            for char in greeting_response:
                yield char
                import time
                time.sleep(0.01)

        return Response(stream_greeting(), mimetype="text/plain")

    messages.append({"role": "user", "content": user_input})

    def generate():
        try:
            # Add format reminder to the user's message
            enhanced_messages = messages.copy()
            enhanced_messages[-1]["content"] = f"{user_input}\n\n[Remember: Respond as Aksara using the exact format with üåæ emoji, üìã Key Points, and üí° Pro Tip]"

            output = llm.create_chat_completion(
                messages=enhanced_messages,
                max_tokens=500,  # Increased for complete responses
                temperature=0.5,  # Lower for consistent formatting
                top_p=0.8,
                repeat_penalty=1.15,
                stream=True,
                stop=["User:", "Human:", "Assistant:", "[INST]", "[/INST]"]
            )

            reply_accum = ""
            for chunk in output:
                delta = chunk["choices"][0]["delta"].get("content", "")
                if delta:
                    reply_accum += delta
                    yield delta

            # If response doesn't follow format, try to fix it
            if reply_accum and not reply_accum.strip().startswith("üåæ"):
                corrected = ensure_proper_format(reply_accum)
                # Send the correction
                correction = f"\n\n---\nLet me format that properly:\n\n{corrected}"
                reply_accum += correction
                yield correction

            messages.append({"role": "assistant", "content": reply_accum})

        except Exception as e:
            error_response = f"""üåæ **Sorry, I encountered a technical issue.**

üìã **What you can do:**
‚Ä¢ Try asking your question again
‚Ä¢ Make sure your question is about farming
‚Ä¢ Check if the connection is stable

üí° **Pro Tip:** I work best with specific farming questions like "How to plant tomatoes" or "Pest control for wheat"."""
            yield error_response

    return Response(generate(), mimetype="text/plain")

# -----------------------------
# NON-STREAMING CHAT
# -----------------------------
@app.route("/chat", methods=["POST"])
def chat():
    global messages
    user_input = request.json.get("message", "").strip()

    # Handle greetings
    if user_input.lower() in ["hello", "hi", "hey", "hello there", "hi there"]:
        greeting_response = """üåæ **Hello! I'm Aksara, your farming AI assistant.**

üìã **I can help with:**
‚Ä¢ Crop planting and care
‚Ä¢ Soil health management
‚Ä¢ Pest and disease control
‚Ä¢ Irrigation and farming techniques

üí° **Pro Tip:** Ask me specific farming questions for the best advice!"""
        if "hindi" in user_input.lower() or "‡§π‡§ø‡§Ç‡§¶‡•Ä" in user_input.lower():
            user_input = f"""{user_input}

        [Respond ENTIRELY in proper Hindi using Devanagari script.
        Translate all English/technical terms like Nitrogen, Phosphorus, Potassium, soil, fertilizer, irrigation into Hindi.
        Do not use Roman letters or English words.
        Strictly follow the format: üåæ [Main Answer], üìã Key Points, üí° Pro Tip.]"""
        else:
            pass
        messages.append({"role": "user", "content": user_input})
        messages.append({"role": "assistant", "content": greeting_response})
        return jsonify({"reply": greeting_response})

    messages.append({"role": "user", "content": user_input})

    try:
        # Add format reminder
        enhanced_messages = messages[-20:]  # keep last 20 messages

        enhanced_messages[-1]["content"] = f"{user_input}\n\n[Remember: Respond as Aksara using the exact format with üåæ emoji, üìã Key Points, and üí° Pro Tip]"

        output = llm.create_chat_completion(
            messages=enhanced_messages,
            max_tokens=500,
            temperature=0.5,
            top_p=0.8,
            repeat_penalty=1.15,
            stop=["User:", "Human:", "Assistant:", "[INST]", "[/INST]"]
        )

        reply = output["choices"][0]["message"]["content"].strip()

        # Ensure proper formatting
        if not reply.startswith("üåæ"):
            reply = ensure_proper_format(reply)

        messages.append({"role": "assistant", "content": reply})
        return jsonify({"reply": reply})

    except Exception as e:
        error_response = f"""üåæ **Sorry, I encountered a technical issue.**

üìã **What you can do:**
‚Ä¢ Try asking your question again
‚Ä¢ Make sure your question is about farming
‚Ä¢ Check if the connection is stable

üí° **Pro Tip:** I work best with specific farming questions!"""
        return jsonify({"reply": error_response})

# -----------------------------
# RESET WITH PROPER INITIALIZATION
# -----------------------------
@app.route("/reset", methods=["POST"])
def reset():
    global messages
    messages = [{"role": "system", "content": system_prompt}]
    return jsonify({"status": "üåæ Aksara reset successfully! Ready to help with farming questions."})

# -----------------------------
# DEBUG ENDPOINT
# -----------------------------
@app.route("/debug", methods=["GET"])
def debug():
    return jsonify({
        "model_loaded": True,
        "system_prompt_length": len(system_prompt),
        "message_count": len(messages),
        "last_message": messages[-1] if messages else None,
        "model_path": MODEL_PATH.split("/")[-1]
    })

# -----------------------------
# TEST ENDPOINT
# -----------------------------
@app.route("/test_format", methods=["GET"])
def test_format():
    test_response = """üåæ **This is a test of the formatting system.**

üìã **Key Points:**
‚Ä¢ Format is working correctly
‚Ä¢ Emojis are displaying properly
‚Ä¢ Structure is maintained

üí° **Pro Tip:** This shows the expected response format!"""

    return jsonify({"reply": test_response})

if __name__ == "__main__":
    print("üåæ Starting Aksara Farming AI...")
    print(f"ü§ñ Model: {MODEL_PATH.split('/')[-1]}")
    print(f"üìù System prompt length: {len(system_prompt)} characters")
    ngrok.set_auth_token(userdata.get('ngrok_key')) # ngrok secret key
    public_url = ngrok.connect(5000)
    print("üåç Public URL:", public_url)
    print("üöÄ Ready to help farmers!")

    app.run(host="0.0.0.0", port=5000, debug=False)