In [2]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import AutoTokenizer, AutoModelForCausalLM
import re

# Load depression detection model (keep this part unchanged)
model_path = "./depression_bert_model"
depression_tokenizer = BertTokenizer.from_pretrained(model_path)
depression_model = BertForSequenceClassification.from_pretrained(model_path)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
depression_model.to(device)

# Load offline LLM - Mistral 7B or similar model with 4-bit quantization
def load_offline_llm():
    print("Loading offline language model... This may take a moment.")
    
    # Use 4-bit quantization to reduce VRAM requirements
    model_id = "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"  # Quantized version
    
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    
    # Load with optimizations for limited VRAM
    from transformers import BitsAndBytesConfig
    
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True
    )
    
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",  # Automatically optimize placement
        quantization_config=quantization_config,
        torch_dtype=torch.float16,  # Use mixed precision
        low_cpu_mem_usage=True
    )
    
    return model, tokenizer

# Initialize the model and tokenizer
llm_model, llm_tokenizer = load_offline_llm()

def clean_text(text):
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'@\w+', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    return text.lower().strip()

def predict_depression(text):
    cleaned = clean_text(text)
    inputs = depression_tokenizer(cleaned, return_tensors='pt', truncation=True, max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = depression_model(**inputs)
    
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    return probs[0][1].item()

def generate_response(prompt, context=None):
    # Create a system message to provide context for the model
    system_message = "You're a compassionate mental health assistant. Respond with empathy and validation."
    
    # Format the conversation history and prompt for the model
    formatted_prompt = f"<s>[INST] {system_message}\n\n"
    
    if context:
        # Add previous conversation turns (limited to last 3 for memory constraints)
        for i, msg in enumerate(context[-3:]):
            role = msg["role"]
            content = msg["content"]
            
            if role == "user":
                formatted_prompt += f"User: {content}\n"
            elif role == "assistant":
                formatted_prompt += f"Assistant: {content}\n"
    
    # Add the current prompt
    formatted_prompt += f"User: {prompt} [/INST]"
    
    # Tokenize the input
    inputs = llm_tokenizer(formatted_prompt, return_tensors="pt").to(device)
    
    # Generate the response with conservative settings to manage memory
    with torch.no_grad():
        outputs = llm_model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.7,
            do_sample=True,
            top_p=0.9,
            pad_token_id=llm_tokenizer.eos_token_id
        )
    
    # Decode the output and extract just the assistant's response
    full_output = llm_tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = full_output.split(prompt)[-1].strip()
    
    # Clean up the response (remove any system artifacts)
    if "Assistant:" in response:
        response = response.split("Assistant:")[-1].strip()
    
    return response

def chat():
    history = []
    print("""\n🌱 Mental Health Support Chatbot
    I'm here to listen without judgment. You can:
    - Share as much or little as you like
    - Take breaks anytime with 'quit'
    """)
    
    try:
        while True:
            user_input = input("\nYou: ").strip()
            if user_input.lower() in ['quit', 'exit']:
                print("\nTake care of yourself. You can return anytime.")
                break
                
            # Store the user message in history
            history.append({"role": "user", "content": user_input})
            
            # Analyze input
            prob = predict_depression(user_input)
            
            if prob > 0.4:
                # Generate question 
                question_prompt = f"Based on this statement: '{user_input}', generate a gentle follow-up question"
                question = generate_response(question_prompt, history)
                print(f"\n🌼 Bot: {question}")
                
                # Store the bot's question in history
                history.append({"role": "assistant", "content": question})
                
                # Get response
                answer = input("\nYou: ").strip()
                if answer.lower() in ['quit', 'exit']:
                    print("\nThank you for sharing. Be kind to yourself today.")
                    break
                
                # Store the user's follow-up in history
                history.append({"role": "user", "content": answer})
                
                # Generate advice
                advice_prompt = f"User shared: '{user_input}' then '{answer}'. Provide 2-3 supportive suggestions."
                advice = generate_response(advice_prompt, history)
                print(f"\n🌱 Bot: {advice}")
                
                # Store the bot's advice in history
                history.append({"role": "assistant", "content": advice})
                
                if prob > 0.7:
                    print("\nIf you need immediate support:")
                    print("- National Suicide Prevention Lifeline: 1-800-273-TALK")
                    print("- Crisis Text Line: Text HOME to 741741")
                
            else:
                # Generate acknowledgment
                response = generate_response(f"Acknowledge this feeling: {user_input}", history)
                print(f"\n🌸 Bot: {response}")
                
                # Store the bot's response in history
                history.append({"role": "assistant", "content": response})

            # Continue prompt
            cont = input("\nPress Enter to continue or type 'quit': ").strip()
            if cont.lower() in ['quit', 'exit']:
                print("\nRemember: Progress isn't linear. Be proud you reached out!")
                break

    except KeyboardInterrupt:
        print("\n\nIt's okay to pause. Come back when you're ready.")

if __name__ == "__main__":
    chat()


  from .autonotebook import tqdm as notebook_tqdm


Loading offline language model... This may take a moment.


PackageNotFoundError: No package metadata was found for bitsandbytes

In [None]:
import bitsandbytes
print(bitsandbytes.__version__)  # Should output 0.41.1 or higher


ModuleNotFoundError: No module named 'bitsandbytes'