In [None]:
def load_offline_llm():
    print("Loading offline language model...")
    
    model_id = "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    model = AutoGPTQForCausalLM.from_quantized(
        model_id,
        use_safetensors=True,
        device_map="auto",
        trust_remote_code=True,
        use_triton=False,
        quantize_config=None,
        inject_fused_attention=False  # Disable for better compatibility
    )
    
    return model, tokenizer


In [None]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from llama_cpp import Llama  # New import for GGUF models
import re

model_path = "./depression_bert_model"
depression_tokenizer = BertTokenizer.from_pretrained(model_path)
depression_model = BertForSequenceClassification.from_pretrained(model_path)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
depression_model.to(device)

def load_offline_llm():
    print("Loading offline language model...")
    

    model_path = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
    
    # Initialize Llama model
    llm = Llama(
        model_path=model_path,
        n_ctx=2048,  # Context window size
        n_threads=4,  # CPU threads
        n_gpu_layers=33,  # Layers to offload to GPU
        verbose=False
    )
    
    return llm

# Initialize the model
llm_model = load_offline_llm()

# Function to clean text input
def clean_text(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'@\w+', '', text)  # Remove mentions
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove special characters
    return text.lower().strip()

# Function to predict depression probability using the BERT model
def predict_depression(text):
    cleaned = clean_text(text)
    inputs = depression_tokenizer(cleaned, return_tensors='pt', truncation=True, max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = depression_model(**inputs)
    
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    return probs[0][1].item()  # Probability of positive class (depression)

# Function to generate a response using the LLM
def generate_response(prompt, history):
    system_message = """You are a mental health assistant. Respond with:
- Empathetic validation
- Follow-up questions when needed
- Practical coping suggestions
- Crisis resources when risk is detected"""
    
    full_prompt = f"[INST] {system_message}\n\nChat History:\n" + \
                  "\n".join([f"{m['role']}: {m['content']}" for m in history]) + \
                  f"\n\nUser: {prompt} [/INST]"
    
    # Generate response
    output = llm_model(
        prompt=full_prompt,
        max_tokens=256,
        temperature=0.7,
        top_p=0.9,
        repeat_penalty=1.1,
    )
    
    return output['choices'][0]['text'].strip()

# Chat function to interact with the user
def chat():
    history = []
    print("\n🌱 Mental Health Support Chatbot - Type 'quit' to exit")
    
    while True:
        user_input = input("\nYou: ").strip()
        
        if user_input.lower() in ['quit', 'exit']:
            print("\nTake care of yourself. You can return anytime.")
            break
        
        # Analyze input with depression model
        depression_prob = predict_depression(user_input)
        history.append({"role": "user", "content": user_input})
        
        # Generate context-aware response based on depression probability
        if depression_prob > 0.65:  # High risk
            prompt = (
                f"User shows depression signs (probability: {depression_prob:.2f}). "
                "Provide supportive response with crisis resources."
            )
        elif depression_prob > 0.4:  # Moderate risk
            prompt = (
                f"User may be struggling (probability: {depression_prob:.2f}). "
                "Ask a gentle follow-up question and suggest one coping strategy."
            )
        else:  # Low risk
            prompt = "Acknowledge the message and offer general support."
        
        # Generate and display response from LLM
        response = generate_response(prompt, history)
        print(f"\n🌼 Bot: {response}")
        
        history.append({"role": "assistant", "content": response})

        history = history[-6:]


if __name__ == "__main__":
    chat()
