In [None]:
pip install transformers torch pandas streamlit folium streamlit-folium plotly pdfplumber requests

Collecting streamlit
  Downloading streamlit-1.45.0-py3-none-any.whl.metadata (8.9 kB)
Collecting streamlit-folium
  Downloading streamlit_folium-0.25.0-py3-none-any.whl.metadata (621 bytes)
Collecting pdfplumber
  Downloading pdfplumber-0.11.6-py3-none-any.whl.metadata (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metad

In [None]:
pip install bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl (76.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.5


In [None]:
!streamlit run app3.py &>/dev/null &


# Expose the Streamlit app using ngrok
!ssh -o StrictHostKeyChecking=no -R 80:localhost:8501 serveo.net

ssh: connect to host serveo.net port 22: Connection refused


In [None]:
# Install Streamlit and ngrok (already done, but included for completeness)
!pip install streamlit
!pip install ngrok
!pip install pyngrok

# Run Streamlit app in the background (already running)
!streamlit run app3.py &>/dev/null &

# Set up ngrok with pyngrok
from pyngrok import ngrok, conf

# Set auth token
conf.get_default().auth_token = "2wavhDIIlhw5QwDrujdyrq3q1y9_7DPqWdnGKtAXKQFUTPuta"

# Terminate existing tunnels
ngrok.kill()

# Start a new tunnel to port 8501
public_url = ngrok.connect(8501, "http")
print("Public URL:", public_url)

Public URL: NgrokTunnel: "https://9373-34-142-196-61.ngrok-free.app" -> "http://localhost:8501"


In [None]:
!lsof -i :8501

COMMAND    PID USER   FD   TYPE DEVICE SIZE/OFF NODE NAME
streamlit 2067 root    6u  IPv4  67492      0t0  TCP *:8501 (LISTEN)
streamlit 2067 root    7u  IPv6  67493      0t0  TCP *:8501 (LISTEN)


In [None]:
!ngrok authtoken 2wavhDIIlhw5QwDrujdyrq3q1y9_7DPqWdnGKtAXKQFUTPuta

/bin/bash: line 1: ngrok: command not found


In [None]:
!streamlit run app3.py &>/dev/null &

In [None]:
%%writefile app3.py

import streamlit as st
import pandas as pd
import plotly.express as px
import folium
from streamlit_folium import folium_static
import pdfplumber
import requests
import torch
import time
import threading
import hashlib
import concurrent.futures
from transformers import AutoModelForCausalLM, AutoTokenizer
import numpy as np

# News API setup
NEWS_API_KEY = '0c41e6bff83444978d43b7adf46540ed'
BASE_URL = 'https://newsapi.org/v2/everything'

# Cache for responses
response_cache = {}

# Initialize session state variables
if 'model' not in st.session_state:
    st.session_state.model = None
if 'tokenizer' not in st.session_state:
    st.session_state.tokenizer = None
if 'device' not in st.session_state:
    st.session_state.device = None
if 'model_loaded' not in st.session_state:
    st.session_state.model_loaded = False
if 'advanced_mode' not in st.session_state:
    st.session_state.advanced_mode = False

# Check for GPU availability with detailed info
def check_gpu_availability():
    """Check GPU availability and return detailed information"""
    if torch.cuda.is_available():
        device = "cuda"
        # Get GPU details
        gpu_name = torch.cuda.get_device_name(0)
        total_memory = torch.cuda.get_device_properties(0).total_memory / 1e9  # Convert to GB

        st.sidebar.success(f"✅ GPU acceleration available: {gpu_name}")
        st.sidebar.info(f"Total VRAM: {total_memory:.2f} GB")

        # Get current memory usage
        if hasattr(torch.cuda, 'memory_allocated'):
            allocated = torch.cuda.memory_allocated(0) / 1e9
            st.sidebar.info(f"VRAM Used: {allocated:.2f} GB")
    else:
        device = "cpu"
        cpu_count = concurrent.futures.ThreadPoolExecutor()._max_workers
        st.sidebar.info(f"ℹ️ Running on CPU ({cpu_count} cores)")

    return device

# Model loading with advanced options
@st.cache_resource(show_spinner=False)
def load_advanced_model(device, advanced_mode=False):
    """Load appropriate model based on hardware and settings"""
    try:
        # Determine model based on hardware capabilities and settings
        if advanced_mode and device == "cuda":
            # Check VRAM for appropriate model selection
            try:
                vram_gb = torch.cuda.get_device_properties(0).total_memory / 1e9

                if vram_gb >= 16:
                    # Large model for high-end hardware
                    model_name = "NousResearch/Nous-Hermes-2-Yi-9B"
                    st.sidebar.success(f"Loading advanced 9B model ({model_name})")
                elif vram_gb >= 8:
                    # Medium model for mid-range hardware
                    model_name = "Qwen/Qwen1.5-7B-Chat"
                    st.sidebar.success(f"Loading mid-range 7B model ({model_name})")
                else:
                    # Smaller model for limited hardware
                    model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
                    st.sidebar.success(f"Loading compact 1.1B model ({model_name})")
            except Exception as e:
                # Fallback if VRAM detection fails
                model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
                st.sidebar.warning(f"VRAM detection failed, using compact model: {e}")
        else:
            # Standard mode - use reliable small model
            model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
            st.sidebar.info(f"Loading standard model: {model_name}")

        # Load tokenizer first
        tokenizer = AutoTokenizer.from_pretrained(model_name)

        # Configure loading options based on device and available memory
        load_kwargs = {
            "device_map": "auto",
            "low_cpu_mem_usage": True
        }

        # Add GPU optimizations if available
        if device == "cuda":
            try:
                vram_gb = torch.cuda.get_device_properties(0).total_memory / 1e9

                if vram_gb >= 16:
                    # Use FP16 for larger GPUs
                    load_kwargs["torch_dtype"] = torch.float16
                    # Use flash attention if supported
                    if model_name in ["NousResearch/Nous-Hermes-2-Yi-9B", "Qwen/Qwen1.5-7B-Chat"]:
                        load_kwargs["attn_implementation"] = "flash_attention_2"
                elif vram_gb >= 8:
                    # Use 8-bit quantization for mid-range GPUs
                    load_kwargs["load_in_8bit"] = True
                else:
                    # Use 4-bit quantization for smaller GPUs
                    load_kwargs["load_in_4bit"] = True
                    load_kwargs["bnb_4bit_compute_dtype"] = torch.float16
            except:
                # Fallback if VRAM detection fails
                load_kwargs["load_in_8bit"] = True

        # Load the model with determined settings
        model = AutoModelForCausalLM.from_pretrained(model_name, **load_kwargs)

        # Verify model loads successfully
        test_input = tokenizer("Hello", return_tensors="pt").to(device)
        with torch.no_grad():
            test_output = model.generate(test_input.input_ids, max_new_tokens=5)

        return model, tokenizer
    except Exception as e:
        st.error(f"Error loading model: {e}")
        st.warning("Attempting to load a smaller model...")

        try:
            # Last resort: tiny model that should work on almost any hardware
            backup_model = "EleutherAI/pythia-160m"
            tokenizer = AutoTokenizer.from_pretrained(backup_model)
            tokenizer.pad_token = tokenizer.eos_token
            model = AutoModelForCausalLM.from_pretrained(backup_model, device_map="auto", low_cpu_mem_usage=True)
            return model, tokenizer
        except Exception as e2:
            st.error(f"Error loading backup model: {e2}")
            return None, None

# Optimized text generation with streaming capability
def generate_with_streaming(prompt, model, tokenizer, device, placeholder, speed_factor=1.0):
    """Generate text with streaming output"""
    # Prepare model inputs
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    # Set generation parameters - balance quality and speed
    gen_config = {
        "max_new_tokens": 512,
        "temperature": 0.7,
        "top_p": 0.92,
        "repetition_penalty": 1.2,
        "do_sample": True
    }

    # For larger models with GPU, enable additional optimizations
    if device == "cuda" and hasattr(model, "config"):
        gen_config["use_cache"] = True

    # Generate text with optimizations
    with torch.no_grad():
        # Generate the complete response first
        outputs = model.generate(inputs.input_ids, **gen_config)
        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract the actual response part (after the prompt)
        response_parts = full_response.split(prompt)
        if len(response_parts) > 1:
            response = response_parts[-1].strip()
        else:
            # Alternative extraction method
            try:
                if "<|assistant|>" in full_response:
                    response = full_response.split("<|assistant|>")[-1].strip()
                else:
                    response = full_response.strip()
            except:
                response = full_response

        # Stream the response with a typing effect
        # Calculate delay based on speed factor (smaller = faster)
        delay = 0.01 / speed_factor

        # Stream with increasing chunks for natural feel
        partial_response = ""
        chunk_size = 5  # Start with small chunks

        for i in range(0, len(response), chunk_size):
            # Gradually increase chunk size for more natural streaming
            if i > 100:
                chunk_size = 20
            elif i > 50:
                chunk_size = 10

            end_idx = min(i + chunk_size, len(response))
            partial_response = response[:end_idx]
            placeholder.markdown(partial_response + "▌")
            time.sleep(delay)

            # Occasionally add a slightly longer pause for natural reading
            if i % 50 == 0 and i > 0:
                time.sleep(delay * 3)

        # Show final response
        placeholder.markdown(response)

        return response

# Predefined high-quality responses - expanded set for common legal questions
predefined_responses = {
    "arraignment": """An arraignment is a critical early stage in the criminal justice process where a defendant is formally presented with charges. During an arraignment, several key events occur:

First, the defendant appears before a judge in a courtroom setting. The judge presides over the proceedings and makes legal determinations. The charges against the defendant are formally read aloud, ensuring the defendant understands exactly what they are being accused of.

Second, the defendant is informed of their constitutional rights, including the right to counsel, the right to a trial, and the right against self-incrimination. If the defendant cannot afford an attorney, the court will appoint one. At this stage, the defendant enters a plea - typically guilty, not guilty, or no contest (nolo contendere).

Third, the judge makes decisions about bail or pretrial release conditions. The judge considers factors such as the severity of the alleged crime, the defendant's criminal history, community ties, and whether they pose a flight risk or danger to the community. Based on these factors, the judge may set bail, release the defendant on their own recognizance, or in some serious cases, order the defendant to be held without bail.

The participants in an arraignment include the judge (who presides over the proceeding), the prosecutor (representing the state/government), the defense attorney (representing the defendant), the defendant themselves, and court personnel such as the clerk and bailiff. Unlike trials, arraignments typically do not involve juries.""",

    "bail": """Bail is a financial arrangement that allows a defendant to be released from custody while awaiting trial. The primary purpose of bail is to ensure the defendant returns for future court appearances while balancing their right to freedom before conviction.

There are several forms of bail commonly used in the criminal justice system. Cash bail requires the defendant to pay the full amount to the court. Surety bonds involve a bail bondsman who charges a fee (typically 10-15% of the bail amount) and posts the full bond on behalf of the defendant. Property bonds use real estate as collateral. Some defendants may be released on their own recognizance without financial requirements if the judge determines they pose little flight risk.

When determining bail, judges consider multiple factors: the severity of the alleged crime, the defendant's criminal history, ties to the community, financial resources, and whether they pose a danger to the public. The Eighth Amendment to the U.S. Constitution prohibits "excessive bail," though courts have considerable discretion in setting amounts.

Not every defendant is eligible for bail. Those charged with the most serious crimes (like capital murder) or who pose significant flight risks may be held without bail. Similarly, defendants who violate previous bail conditions may have their bail revoked.""",

    "evidence": """Evidence in criminal cases falls into several distinct categories, each with specific legal implications and handling requirements.

Direct evidence directly proves a fact without requiring inference or presumption. Examples include eyewitness testimony, video recordings of the crime in progress, or a suspect's confession. This type of evidence has straightforward probative value but may still have reliability issues, particularly with eyewitness accounts which can be affected by perception limitations, memory distortion, or bias.

Circumstantial evidence requires inference to connect it to a conclusion. For example, fingerprints at a crime scene don't directly prove someone committed a crime, but they prove the person was present. Despite common misconceptions, circumstantial evidence can be extremely powerful, especially when multiple pieces corroborate each other. Many cases are successfully prosecuted primarily on circumstantial evidence.

Physical evidence encompasses tangible objects relevant to the crime, such as weapons, documents, biological samples, or trace evidence like fibers and soil. The collection, preservation, and analysis of physical evidence must follow strict protocols to maintain the "chain of custody" - the documented chronology of the evidence's handling and storage. Breaks in this chain can render evidence inadmissible in court.

Testimonial evidence comes from witnesses' statements, either in sworn affidavits or court testimony. The reliability of this evidence depends on factors including the witness's credibility, opportunity to observe, memory, and potential biases.

Digital evidence has become increasingly important in modern investigations. This includes data from computers, mobile devices, social media accounts, surveillance systems, and IoT devices. Digital forensics specialists must follow particular protocols to ensure this evidence is authentic, complete, and admissible."""
}

# Enhanced response generation
def get_ai_response(query, context=None, role="investigator", stream=True, placeholder=None):
    """Generate AI responses with multiple options for quality/speed balance"""
    # Get cached model, tokenizer and device
    model = st.session_state.model
    tokenizer = st.session_state.tokenizer
    device = st.session_state.device

    if not model or not tokenizer:
        return "Model is not loaded. Please wait for the model to load or check your hardware requirements."

    # Check cache for this query (using hash for consistency)
    query_hash = hashlib.md5((query + (context or "") + role).encode()).hexdigest()
    cached_response = response_cache.get(query_hash)

    if cached_response:
        # If streaming, simulate typing for cached response
        if stream and placeholder:
            # Show cached marker for transparency (only in dev mode)
            if st.session_state.get('dev_mode', False):
                placeholder.caption("Using cached response")

            # Display with typing effect
            partial = ""
            for i in range(0, len(cached_response), 5):
                partial = cached_response[:i+5]
                placeholder.markdown(partial + "▌")
                time.sleep(0.01)  # Very fast for cached responses
            placeholder.markdown(cached_response)

        return cached_response

    # Check predefined responses for exact matches to common questions
    for key, response in predefined_responses.items():
        if key in query.lower():
            if stream and placeholder:
                # Simulate streaming for predefined responses
                partial = ""
                for i in range(0, len(response), 10):
                    partial = response[:i+10]
                    placeholder.markdown(partial + "▌")
                    time.sleep(0.01)  # Fast for predefined responses
                placeholder.markdown(response)
            return response

    try:
        # Define role-specific prompt templates
        role_contexts = {
            "investigator": "You are an expert criminal investigator with extensive knowledge in forensics, detective work, and criminal profiling. You provide detailed analyses of crime scenes and investigative techniques.",
            "legal_expert": "You are a seasoned legal professional specializing in criminal law, legal procedures, and case precedents. You offer comprehensive legal advice and analysis of complex legal situations. You have a deep understanding of the roles of judges, prosecutors, defense attorneys, and the court process.",
            "prevention_specialist": "You are a crime prevention expert with comprehensive knowledge of safety strategies, risk mitigation techniques, and security systems. You provide detailed preventative measures tailored to specific scenarios.",
            "news_analyst": "You are a professional crime news analyst with insights into current criminal trends, societal impacts, and patterns in criminal behavior. You offer detailed analysis of crime reporting and media coverage."
        }

        # Create system prompt
        system_prompt = role_contexts.get(role, role_contexts["investigator"])

        # Add context from previous conversation if available
        if context:
            context_text = f"\nHere is some context from our previous conversation: {context}\n"
        else:
            context_text = ""

        # Format prompt with the model's expected format
        if hasattr(tokenizer, "apply_chat_template") and callable(tokenizer.apply_chat_template):
            # For models that support chat templates
            messages = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"{context_text}\n\nPlease provide a comprehensive, factually accurate response to this question:\n\n{query}"}
            ]
            prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        else:
            # Manual formatting for models without chat template support
            prompt = f"<|system|>\n{system_prompt}\n<|user|>\n{context_text}\nPlease provide a comprehensive, factually accurate response to this question:\n\n{query}\n<|assistant|>\n"

        # Generate response with streaming if requested
        if stream and placeholder:
            # Stream the response as it's generated
            response = generate_with_streaming(
                prompt,
                model,
                tokenizer,
                device,
                placeholder,
                speed_factor=1.5  # Slightly faster than default
            )
        else:
            # Generate without streaming for background tasks
            inputs = tokenizer(prompt, return_tensors="pt").to(device)
            with torch.no_grad():
                outputs = model.generate(
                    inputs.input_ids,
                    max_new_tokens=512,
                    temperature=0.7,
                    do_sample=True,
                    top_p=0.92,
                    repetition_penalty=1.1
                )
            full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

            # Extract the response from the full output
            response_parts = full_output.split(query)
            if len(response_parts) > 1:
                response = response_parts[-1].strip()
            else:
                # Alternative extraction method
                if "<|assistant|>" in full_output:
                    response = full_output.split("<|assistant|>")[-1].strip()
                else:
                    response = full_output

        # Format response into paragraphs if needed
        if len(response) > 300 and "\n\n" not in response:
            sentences = response.split('. ')
            paragraphs = []
            current_paragraph = []

            for sentence in sentences:
                if not sentence.endswith('.'):
                    sentence += '.'
                current_paragraph.append(sentence)

                if len(current_paragraph) >= 3:  # Group ~3 sentences per paragraph
                    paragraphs.append(' '.join(current_paragraph))
                    current_paragraph = []

            if current_paragraph:  # Add any remaining sentences
                paragraphs.append(' '.join(current_paragraph))

            response = '\n\n'.join(paragraphs)

        # Cache the response for future use
        response_cache[query_hash] = response

        return response

    except Exception as e:
        error_msg = f"Error processing query: {str(e)}"

        if stream and placeholder:
            placeholder.error(error_msg)

        # Try fallback to a predefined response based on query keywords
        if "arraignment" in query.lower():
            return predefined_responses.get("arraignment", "An arraignment is the first formal court proceeding where the defendant hears the charges.")
        elif "bail" in query.lower():
            return predefined_responses.get("bail", "Bail is a financial arrangement allowing defendants to be released from custody while awaiting trial.")
        elif "evidence" in query.lower():
            return predefined_responses.get("evidence", "Evidence in criminal cases can be direct or circumstantial.")
        else:
            return "I apologize, but I couldn't process your question properly. Please try asking a simpler question or check your hardware resources."

# Parallel document processing
def process_document_in_parallel(text, chunk_size=5000):
    """Process document text in parallel chunks for faster analysis"""
    # Split document into manageable chunks
    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

    # Process chunks in parallel
    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Function to process each chunk
        def analyze_chunk(chunk):
            # Simple keyword extraction - can be enhanced with NLP
            legal_terms = ["court", "defendant", "plaintiff", "testimony", "evidence",
                         "judge", "attorney", "ruling", "verdict", "motion", "objection"]

            found_terms = []
            for term in legal_terms:
                if term in chunk.lower():
                    count = chunk.lower().count(term)
                    found_terms.append(f"{term} ({count})")

            return {
                "excerpt": chunk[:100] + "...",
                "terms": found_terms,
                "length": len(chunk)
            }

        # Process all chunks
        results = list(executor.map(analyze_chunk, chunks))

    return results

def create_interactive_ai_tab(title, role, initial_context=None):
    """Create an interactive AI tab with conversation history"""
    # Unique session state for each tab
    session_key = f"{role}_conversation_history"

    # Initialize conversation history
    if session_key not in st.session_state:
        st.session_state[session_key] = []
        if initial_context:
            st.session_state[session_key].append({
                'role': 'system',
                'content': initial_context
            })

    # Display conversation history
    for message in st.session_state[session_key]:
        if message['role'] == 'user':
            st.chat_message("user").write(message['content'])
        elif message['role'] == 'assistant':
            st.chat_message("assistant").write(message['content'])
        elif message['role'] == 'system':
            st.info(message['content'])

    # Chat input
    user_query = st.chat_input(f"Ask {title} a question...")

    if user_query:
        # Check if model is loaded
        if not st.session_state.model_loaded:
            with st.spinner("Loading AI model (this may take a minute)..."):
                # Load model and prepare for use
                device = check_gpu_availability()
                model, tokenizer = load_advanced_model(device, st.session_state.advanced_mode)

                if model and tokenizer:
                    st.session_state.model = model
                    st.session_state.tokenizer = tokenizer
                    st.session_state.device = device
                    st.session_state.model_loaded = True
                    st.success("AI model loaded successfully!")
                else:
                    st.error("Failed to load model. Please check your installation and hardware.")
                    return

        # Add user message to history
        st.session_state[session_key].append({
            'role': 'user',
            'content': user_query
        })

        # Display user query
        st.chat_message("user").write(user_query)

        # Generate AI response
        with st.chat_message("assistant"):
            # Create a placeholder for streaming response
            response_placeholder = st.empty()

            # Start timestamp for performance measurement
            start_time = time.time()

            # Prepare context from previous messages (limited to last 3 for speed)
            recent_messages = st.session_state[session_key][-4:]  # User's new message + up to 3 previous
            context = "\n".join([
                f"{msg['role']}: {msg['content']}"
                for msg in recent_messages
                if msg['role'] != 'system'
            ])

            # Generate response with streaming
            ai_response = get_ai_response(
                user_query,
                context=context,
                role=role,
                stream=True,
                placeholder=response_placeholder
            )

            # Calculate response time
            response_time = time.time() - start_time

            # Show response time in subtle format
            st.caption(f"Response time: {response_time:.2f} seconds")

            # Store AI response in history
            st.session_state[session_key].append({
                'role': 'assistant',
                'content': ai_response
            })

    # Advanced options
    with st.expander("Conversation Controls", expanded=False):
        # Reset conversation button
        if st.button(f"🔄 Reset {title} Conversation", key=f"reset_conversation_{role}"):
            st.session_state[session_key] = []
            if initial_context:
                st.session_state[session_key].append({
                    'role': 'system',
                    'content': initial_context
                })
            st.rerun()

        # Add copy button with unique key
        if st.session_state[session_key] and len(st.session_state[session_key]) > 1:
            if st.button("📋 Copy Last Response", key=f"copy_response_{role}"):
                last_response = next((msg['content'] for msg in reversed(st.session_state[session_key])
                                    if msg['role'] == 'assistant'), None)
                if last_response:
                    st.success("Response copied to clipboard!")

def fetch_crime_news():
    """Fetch recent crime-related news articles with caching"""
    # Use cache to avoid redundant API calls
    @st.cache_data(ttl=3600)  # Cache for 1 hour
    def cached_news_fetch():
        params = {
            'q': 'crime OR criminal OR police',
            'apiKey': NEWS_API_KEY,
            'language': 'en',
            'pageSize': 5
        }
        try:
            response = requests.get(BASE_URL, params=params)
            data = response.json()
            return data.get('articles', [])
        except Exception as e:
            st.error(f"Error fetching news: {e}")
            return []

    return cached_news_fetch()

def format_news_article(article):
    """Format news article with proper rendering and truncation"""
    title = article.get('title', 'No Title')
    source = article.get('source', {}).get('name', 'Unknown Source')
    date = article.get('publishedAt', '')[:10] if article.get('publishedAt') else ''
    description = article.get('description', 'No description available')
    url = article.get('url', '')

    # Handle potential rendering issues with descriptions
    if description:
        # Truncate long descriptions
        if len(description) > 200:
            description = description[:200] + "..."

        # Fix potential HTML tags in description
        description = description.replace('<', '&lt;').replace('>', '&gt;')

    return {
        'title': title,
        'source': source,
        'date': date,
        'description': description,
        'url': url
    }

# Create a dynamic crime dashboard
def create_crime_dashboard():
    """Create an interactive crime statistics dashboard"""
    st.subheader("Crime Hotspot Visualization")

    # Sample data - could be replaced with real data source
    crime_data = pd.DataFrame({
        "City": ["Delhi", "Mumbai", "Kolkata", "Chennai", "Bangalore",
                "Hyderabad", "Pune", "Ahmedabad", "Jaipur", "Lucknow"],
        "Latitude": [28.7041, 19.0760, 22.5726, 13.0827, 12.9716,
                    17.3850, 18.5204, 23.0225, 26.9124, 26.8467],
        "Longitude": [77.1025, 72.8777, 88.3639, 80.2707, 77.5946,
                    78.4867, 73.8567, 72.5714, 75.7873, 80.9462],
        "Crime_Rate": [780, 720, 640, 600, 580, 550, 520, 490, 470, 450],
        "Burglary": [320, 280, 250, 220, 210, 190, 180, 170, 160, 150],
        "Assault": [280, 260, 240, 230, 220, 210, 200, 190, 180, 170],
        "Vehicle_Theft": [180, 170, 150, 140, 130, 120, 110, 100, 90, 80]
    })

    # Add tabs for different visualizations
    map_tab, chart_tab, table_tab = st.tabs(["Crime Map", "Crime Charts", "Data Table"])

    with map_tab:
        # Create map options
        crime_type = st.selectbox(
            "Select Crime Type",
            ["Overall Crime Rate", "Burglary", "Assault", "Vehicle Theft"],
            key="map_crime_selector"
        )

        # Map the selection to dataframe column
        crime_column_map = {
            "Overall Crime Rate": "Crime_Rate",
            "Burglary": "Burglary",
            "Assault": "Assault",
            "Vehicle Theft": "Vehicle_Theft"
        }

        selected_column = crime_column_map[crime_type]

        # Create interactive map with folium
        m = folium.Map(location=[20.5937, 78.9629], zoom_start=5)

        # Add crime markers
        for _, row in crime_data.iterrows():
            # Scale circle size proportionally
            radius = row[selected_column] / 40  # Adjust divisor to get reasonable circle sizes

            # Create color based on crime rate (red = high, yellow = medium, green = low)
            max_value = crime_data[selected_column].max()
            crime_ratio = row[selected_column] / max_value

            if crime_ratio > 0.7:
                color = "red"
            elif crime_ratio > 0.4:
                color = "orange"
            else:
                color = "green"

            # Create popup content with HTML for better formatting
            popup_html = f"""
            <div style="width:200px">
                <h4>{row['City']}</h4>
                <b>{crime_type}:</b> {row[selected_column]}<br>
                <b>Overall Crime Rate:</b> {row['Crime_Rate']}
            </div>
            """

            # Add interactive marker
            folium.CircleMarker(
                location=[row["Latitude"], row["Longitude"]],
                radius=radius,
                color=color,
                fill=True,
                fill_color=color,
                fill_opacity=0.7,
                popup=folium.Popup(popup_html, max_width=300)
            ).add_to(m)

        # Display the map
        folium_static(m)

    with chart_tab:
        # Create crime comparison chart
        st.subheader("Crime Type Comparison by City")

        # Melt the dataframe for plotting
        plot_data = pd.melt(
            crime_data,
            id_vars=['City'],
            value_vars=['Burglary', 'Assault', 'Vehicle_Theft'],
            var_name='Crime Type',
            value_name='Count'
        )

        # Create interactive bar chart
        fig = px.bar(
            plot_data,
            x='City',
            y='Count',
            color='Crime Type',
            barmode='group',
            title="Crime Distribution by City",
            height=500
        )

        # Customize layout
        fig.update_layout(
            xaxis_title="City",
            yaxis_title="Number of Incidents",
            legend_title="Crime Type",
            font=dict(size=12)
        )

        # Display the chart
        st.plotly_chart(fig, use_container_width=True)

        # Add trend analysis
        st.subheader("Crime Trend Analysis")

        # Create sample time series data
        months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun"]
        trend_data = pd.DataFrame({
            "Month": months * 3,
            "Crime Type": ["Burglary"] * 6 + ["Assault"] * 6 + ["Vehicle Theft"] * 6,
            "Count": [120, 115, 118, 125, 115, 110,
                      80, 85, 90, 88, 95, 92,
                      65, 60, 55, 58, 62, 60]
        })

        # Create line chart
        trend_fig = px.line(
            trend_data,
            x="Month",
            y="Count",
            color="Crime Type",
            markers=True,
            title="Monthly Crime Trends"
        )

        # Display trend chart
        st.plotly_chart(trend_fig, use_container_width=True)

    with table_tab:
        # Display sortable data table
        st.subheader("Crime Statistics Data Table")
        st.dataframe(
            crime_data,
            hide_index=True,
            column_config={
                "City": st.column_config.TextColumn("City"),
                "Crime_Rate": st.column_config.NumberColumn("Overall Crime Rate", format="%.0f"),
                "Burglary": st.column_config.NumberColumn("Burglary", format="%.0f"),
                "Assault": st.column_config.NumberColumn("Assault", format="%.0f"),
                "Vehicle_Theft": st.column_config.NumberColumn("Vehicle Theft", format="%.0f")
            }
        )

        # Add download option
        csv = crime_data.to_csv(index=False).encode('utf-8')
        st.download_button(
            "Download Data as CSV",
            csv,
            "crime_statistics.csv",
            "text/csv",
            key="download-csv"
        )

# Main Streamlit App
def main():
    # Streamlit UI Setup
    st.set_page_config(
        page_title="Crime Investigation AI Pro",
        layout="wide",
        initial_sidebar_state="expanded",
        menu_items={
            'About': "Crime Investigation AI Pro - A comprehensive tool for criminal justice professionals"
        }
    )

    # Initialize session state
    if 'advanced_mode' not in st.session_state:
        st.session_state.advanced_mode = False

    # Check GPU and determine device
    device = check_gpu_availability()
    st.session_state.device = device

    # Sidebar for app settings
    with st.sidebar:
        st.title("🚔 AI Crime Console Pro")
        st.subheader("Application Settings")

        # App theme with functional implementation
        theme = st.radio(
            "Interface Theme",
            ["Professional", "Dark Mode", "High Contrast"],
            index=0,
            key="theme_selection"
        )

        # Apply selected theme using custom CSS
        if theme == "Dark Mode":
            st.markdown("""
            <style>
                .stApp {background-color: #1E1E1E; color: #FFFFFF;}
                .stTextInput>div>div>input {background-color: #333333; color: #FFFFFF;}
                .stMarkdown {color: #FFFFFF;}
                div[data-baseweb="card"] {background-color: #333333;}
                .css-145kmo2 {color: #FFFFFF !important;}
            </style>
            """, unsafe_allow_html=True)
        elif theme == "High Contrast":
            st.markdown("""
            <style>
                .stApp {background-color: #000000; color: #FFFFFF;}
                .stTextInput>div>div>input {background-color: #000000; color: #FFFFFF; border: 2px solid #FFFF00;}
                h1, h2, h3 {color: #FFFF00 !important;}
                .stMarkdown {color: #FFFFFF; font-size: 1.1rem;}
                div[data-baseweb="card"] {background-color: #000000; border: 1px solid #FFFF00;}
                .stButton>button {background-color: #FFFF00; color: #000000;}
            </style>
            """, unsafe_allow_html=True)

        # Add advanced mode toggle
        st.divider()
        st.subheader("AI Model Settings")
        advanced_mode = st.toggle("Advanced AI Mode", value=st.session_state.advanced_mode)

        # Update session state if changed
        if advanced_mode != st.session_state.advanced_mode:
            st.session_state.advanced_mode = advanced_mode

            # Reset model if already loaded
            if st.session_state.model_loaded:
                st.warning("AI mode changed. Model will reload with new settings.")
                st.session_state.model_loaded = False
                st.session_state.model = None
                st.session_state.tokenizer = None

        # Show model status
        if st.session_state.model_loaded:
            st.success("✅ AI model loaded and running")

            # Add option to reload model
            if st.button("Reload AI Model"):
                with st.spinner("Reloading model..."):
                    st.session_state.model_loaded = False
                    st.session_state.model = None
                    st.session_state.tokenizer = None

                    model, tokenizer = load_advanced_model(device, st.session_state.advanced_mode)

                    if model and tokenizer:
                        st.session_state.model = model
                        st.session_state.tokenizer = tokenizer
                        st.session_state.model_loaded = True
                        st.success("Model reloaded successfully!")
                    else:
                        st.error("Failed to reload model.")
        else:
            st.info("AI model will load when needed")
            if st.button("Load AI Model Now"):
                with st.spinner("Loading AI model..."):
                    model, tokenizer = load_advanced_model(device, st.session_state.advanced_mode)

                    if model and tokenizer:
                        st.session_state.model = model
                        st.session_state.tokenizer = tokenizer
                        st.session_state.model_loaded = True
                        st.success("AI model loaded successfully!")
                    else:
                        st.error("Failed to load model. Please check your installation and hardware.")

    # Main content
    st.title("🚔 Crime Investigation AI Pro")
    st.subheader("AI-Powered Criminal Justice Assistant")

    # Create tabs
    tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
        "🕵️‍♂️ Crime Solving AI",
        "📑 Legal Document Analysis",
        "📊 Crime Dashboard",
        "📰 Real-Time Crime News",
        "🛡️ Crime Prevention Strategies",
        "⚖️ Criminal Law Consultation"
    ])

    # Crime Solving AI Tab
    with tab1:
        st.subheader("🔍 Criminal Investigation Assistance")
        create_interactive_ai_tab(
            "Sherlock AI Detective",
            "investigator",
            "An expert system designed to assist in solving complex criminal cases."
        )

    # Legal Document Analysis Tab
    with tab2:
        st.subheader("📄 Legal Document AI Analysis")

        # File uploader
        uploaded_file = st.file_uploader("📂 Upload Legal Document (PDF)", type=["pdf"])

        if uploaded_file:
            with st.spinner("Analyzing document..."):
                try:
                    with pdfplumber.open(uploaded_file) as pdf:
                        text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])

                    # Process document in parallel for better performance
                    analysis_results = process_document_in_parallel(text)

                    # Create tabs for different analysis views
                    doc_tabs = st.tabs(["Document Summary", "Document Q&A", "Document Sections"])

                    # Document Summary Tab
                    with doc_tabs[0]:
                        # Display document metadata
                        st.subheader("Document Overview")
                        col1, col2, col3 = st.columns(3)
                        with col1:
                            st.metric("Pages", len(pdf.pages))
                        with col2:
                            st.metric("Words", len(text.split()))
                        with col3:
                            st.metric("Characters", len(text))

                        # Extract and display key information
                        st.subheader("Key Information")

                        # Extract potential dates from the document
                        import re
                        date_pattern = r'\b(?:\d{1,2}[/-]\d{1,2}[/-]\d{2,4}|\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{2,4})\b'
                        dates = re.findall(date_pattern, text)

                        # Extract potential case numbers or document IDs
                        id_pattern = r'\b(?:Case\s+No\.?\s*|ID\s*[:=#]\s*|Document\s+No\.?\s*|File\s+No\.?\s*)([A-Z0-9-]+)\b'
                        case_ids = re.findall(id_pattern, text, re.IGNORECASE)

                        # Display extracted information
                        if dates:
                            st.write("**Dates Mentioned:**", ", ".join(dates[:5]))
                            if len(dates) > 5:
                                st.caption(f"...and {len(dates) - 5} more dates")

                        if case_ids:
                            st.write("**Potential Case/Document IDs:**", ", ".join(case_ids[:3]))

                        # Create a simple text summary
                        st.subheader("Document Preview")
                        st.text_area("First 500 characters:", text[:500] + "...", height=150)

                    # Document Q&A Tab - Interactive queries about the document
                    with doc_tabs[1]:
                        st.subheader("Ask Questions About This Document")

                        # Initialize session state for document context
                        if "document_context" not in st.session_state:
                            st.session_state.document_context = text[:5000]  # Store first 5000 chars for context

                        # Question input
                        doc_question = st.text_input("Enter your question about the document:")

                        if doc_question:
                            with st.spinner("Analyzing document and generating response..."):
                                # Create a response placeholder for streaming
                                response_placeholder = st.empty()

                                # Prepare context with document content
                                document_context = f"Document Title: {uploaded_file.name}\n\nDocument Content (excerpt): {text[:2000]}...\n\n"

                                # Generate AI response about the document
                                ai_response = get_ai_response(
                                    f"Based on the document content provided, please answer this question: {doc_question}",
                                    context=document_context,
                                    role="legal_expert",
                                    stream=True,
                                    placeholder=response_placeholder
                                )

                        # Provide sample questions for guidance
                        with st.expander("Sample Document Questions", expanded=False):
                            st.markdown("""
                            Here are some example questions you can ask about the document:

                            - What are the main legal issues addressed in this document?
                            - Summarize the key points of this legal document.
                            - What parties are mentioned in this document?
                            - What dates or deadlines are mentioned?
                            - What legal terminology is used and what does it mean?
                            - Are there any clear obligations or requirements specified?
                            - What would be the next steps in this legal process?
                            """)

                    # Document Sections Tab
                    with doc_tabs[2]:
                        st.subheader("Document Sections")
                        for i, result in enumerate(analysis_results):
                            with st.expander(f"Section {i+1} - {result['excerpt']}"):
                                st.write(f"Length: {result['length']} characters")
                                if result['terms']:
                                    st.write("Legal terms found: " + ", ".join(result['terms']))
                                else:
                                    st.write("No specific legal terms found in this section.")

                                # Add an option to view full section content
                                if st.button(f"View Full Section {i+1}", key=f"view_section_{i}"):
                                    start_idx = i * 5000
                                    end_idx = start_idx + 5000
                                    section_text = text[start_idx:end_idx] if start_idx < len(text) else "Section not available"
                                    st.text_area(f"Section {i+1} Content:", section_text, height=200)

                except Exception as e:
                    st.error(f"Error processing PDF: {e}")
                    st.info("Try uploading a different PDF file or check if the file is properly formatted.")

        else:
            # Display information when no document is uploaded
            st.info("""
            ## Document Analysis Instructions

            Upload a legal document (PDF format) to analyze its content. Our AI will:

            1. Extract key information
            2. Allow you to ask specific questions about the document
            3. Identify legal terminology and sections

            After uploading, you can interact with the document through the analysis tabs.
            """)

            # Provide a sample document option
            st.markdown("Don't have a document to analyze? Try using a sample legal document.")
            if st.button("Load Sample Document"):
                # This is a placeholder - in a real app, you would load a sample document
                st.info("Sample document functionality would be implemented here.")

                # In a real implementation, you might do something like:
                # sample_path = "path/to/sample_legal_document.pdf"
                # with open(sample_path, "rb") as f:
                #     st.session_state.sample_doc = f.read()
                # st.experimental_rerun()

    # Crime Dashboard Tab
    with tab3:
        create_crime_dashboard()

    # Real-Time Crime News Tab
    with tab4:
        st.subheader("Latest Crime News")

        col1, col2 = st.columns([1, 1])

        with col1:
            # Add refresh button for news
            refresh = st.button("🔄 Refresh News")

            # Fetch and display news
            with st.spinner("Fetching latest crime news..."):
                news_articles = fetch_crime_news()

            if news_articles:
                for article in news_articles[:5]:
                    formatted_article = format_news_article(article)
                    with st.container():
                        st.markdown(f"#### {formatted_article['title']}")
                        st.caption(f"Source: {formatted_article['source']} | {formatted_article['date']}")
                        st.markdown(f"{formatted_article['description']}")
                        # Add Read More link
                        if formatted_article['url']:
                            st.markdown(f"[Read more]({formatted_article['url']})")
                        st.divider()
            else:
                st.warning("No news articles found. Please check your internet connection or try again later.")

        with col2:
            create_interactive_ai_tab(
                "Crime News Analyst",
                "news_analyst",
                "Providing insights and analysis on recent crime news and trends."
            )

    # Crime Prevention Strategies Tab
    with tab5:
        st.subheader("Crime Prevention and Safety Strategies")

        # Simplified single column layout
        with st.container():
            st.markdown("### Security Topics")

            # Topic selector for all security categories
            security_topic = st.selectbox(
                "Select a security topic to explore",
                [
                    "Home Security Systems",
                    "Personal Safety Tips",
                    "Business Security",
                    "Travel Security",
                    "Digital Security",
                    "Vehicle Security",
                    "Community Safety Programs"
                ]
            )

            # Display topic-specific content based on selection
            topic_content = {
                "Home Security Systems": """
                **Home Security Essentials:**

                Modern security systems combine multiple layers of protection including:

                1. Door/window sensors
                2. Motion detectors
                3. Security cameras
                4. Smart locks
                5. Alarm systems

                Ask our AI for specific recommendations tailored to your needs.
                """,

                "Business Security": """
                **Business Security Framework:**

                Comprehensive business security includes:

                1. Risk assessment and security audits
                2. Physical security measures
                3. Employee training and protocols
                4. Incident response planning
                5. Regular security reviews

                Ask our AI for detailed guidance on implementing these measures.
                """,

                "Personal Safety Tips": """
                **Personal Safety Fundamentals:**

                1. Situational awareness training
                2. Emergency response planning
                3. Self-defense basics
                4. Communication tools and techniques
                5. Identifying and avoiding high-risk situations

                Ask our AI for personalized safety recommendations.
                """
            }

            # Display the selected topic content or a default message
            st.info(topic_content.get(security_topic, f"Ask our AI expert about {security_topic} strategies and best practices."))

            # Quick access buttons for common questions
            st.markdown("### Quick Questions")
            quick_questions = st.columns(3)

            with quick_questions[0]:
                if st.button("Best security practices?"):
                    st.session_state["prevention_specialist_quick_q"] = "What are the best security practices for " + security_topic.lower() + "?"

            with quick_questions[1]:
                if st.button("Cost-effective solutions?"):
                    st.session_state["prevention_specialist_quick_q"] = "What are the most cost-effective " + security_topic.lower() + " solutions?"

            with quick_questions[2]:
                if st.button("Latest technologies?"):
                    st.session_state["prevention_specialist_quick_q"] = "What are the latest technologies for " + security_topic.lower() + "?"

        # Interactive AI assistant for prevention
        create_interactive_ai_tab(
            "Prevention Specialist",
            "prevention_specialist",
            "Expert guidance on crime prevention, personal safety, and risk mitigation strategies."
        )

    # Criminal Law Consultation Tab
    with tab6:
        st.subheader("Criminal Law and Legal Consultation")

        # Simplified layout with expandable sections instead of nested tabs
        st.markdown("### Criminal Justice Process")

        # Create a visual representation of the criminal justice process
        stages = [
            "Investigation", "Arrest", "Arraignment", "Pre-Trial",
            "Trial", "Sentencing", "Appeals", "Corrections"
        ]

        # Create a horizontal display of the stages
        cols = st.columns(len(stages))
        for i, (col, stage) in enumerate(zip(cols, stages)):
            with col:
                st.markdown(f"**{i+1}. {stage}**")

        # Add option to select stage for more information
        selected_stage = st.selectbox("Select a stage to learn more:", stages)

        # Display information based on selected stage
        stage_info = {
            "Investigation": "The phase where law enforcement collects evidence and determines if a crime occurred.",
            "Arrest": "Taking a suspect into custody based on probable cause.",
            "Arraignment": "The first court appearance where charges are formally presented.",
            "Pre-Trial": "Period involving discovery, motions, and plea negotiations.",
            "Trial": "The formal judicial examination of evidence to determine guilt.",
            "Sentencing": "Determination of punishment for convicted defendants.",
            "Appeals": "Process to request review of legal proceedings by a higher court.",
            "Corrections": "Implementation of the sentence through incarceration, probation, or other means."
        }

        st.info(stage_info.get(selected_stage, "Select a stage to see details"))

        # Constitutional rights in an expandable section
        with st.expander("Constitutional Rights in Criminal Cases", expanded=False):
            st.markdown("""
            **Key Constitutional Protections:**

            - **Fourth Amendment**: Protection against unreasonable searches and seizures
            - **Fifth Amendment**: Right against self-incrimination and double jeopardy
            - **Sixth Amendment**: Right to counsel, speedy trial, and to confront witnesses
            - **Eighth Amendment**: Protection against excessive bail and cruel punishment
            """)

        # Common legal terms in an expandable section
        with st.expander("Common Legal Terms", expanded=False):
            st.markdown("""
            - **Probable Cause**: Reasonable basis for believing a crime may have been committed
            - **Reasonable Doubt**: Standard of proof required to validate a criminal conviction
            - **Miranda Rights**: Procedural safeguards to protect a suspect's Fifth Amendment rights
            - **Chain of Custody**: Chronological documentation showing the seizure, custody, control, and disposition of evidence
            - **Plea Bargain**: Agreement where a defendant pleads guilty to a lesser charge in exchange for a more lenient sentence
            """)

        # Quick access buttons for common legal questions
        st.markdown("### Quick Legal Questions")
        legal_quick_q = st.columns(2)

        with legal_quick_q[0]:
            if st.button("What happens at arraignment?"):
                st.session_state["legal_expert_quick_q"] = "What happens at an arraignment hearing?"

        with legal_quick_q[1]:
            if st.button("Rights when arrested?"):
                st.session_state["legal_expert_quick_q"] = "What are my rights when being arrested?"

        # Interactive AI assistant for legal consultation
        create_interactive_ai_tab(
            "Legal Counsel AI",
            "legal_expert",
            "Providing professional legal insights and guidance on criminal law matters."
        )

# Run the application
if __name__ == "__main__":
    main()


Writing app3.py
