In [1]:
!pip install PyPDF2 python-docx

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m232.6/232.6 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading python_docx-1.2.0-py3-none-any.whl (252 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m253.0/253.0 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx, PyPDF2
Successfully installed PyPDF2-3.0.1 python-docx-1.2.0


In [None]:
import gradio as gr
import torch
import time
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import PyPDF2
import docx
import io

class QwenChatbot:
    def __init__(self, model_name="Qwen/Qwen2.5-7B-Instruct"):
        """Initialize the Qwen model with GPU support"""
        print("Loading Qwen 2.5 7B Instruct model...")
        print(f"CUDA available: {torch.cuda.is_available()}")
        if torch.cuda.is_available():
            print(f"GPU: {torch.cuda.get_device_name(0)}")
            print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
        
        try:
            # Load tokenizer
            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
            
            # Load model with GPU support
            self.model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.float16,
                device_map="auto"
            )
            
            print("Model loaded successfully!")
            self.model_loaded = True
            print(f"Model device: {self.model.device}")
            
        except Exception as e:
            print(f"Error loading model: {e}")
            self.model = None
            self.tokenizer = None
            self.model_loaded = False

        self.default_system_prompt = """You are Qwen, created by Alibaba Cloud. You are a helpful assistant."""

    def extract_text_from_file(self, file_path):
        """Extract text from uploaded files"""
        if file_path is None:
            return ""
        
        try:
            file_extension = file_path.lower().split('.')[-1]
            
            if file_extension == 'pdf':
                with open(file_path, 'rb') as file:
                    pdf_reader = PyPDF2.PdfReader(file)
                    text = ""
                    for page in pdf_reader.pages:
                        text += page.extract_text() + "\n"
                    return text
            
            elif file_extension in ['docx', 'doc']:
                doc = docx.Document(file_path)
                text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
                return text
            
            elif file_extension in ['txt', 'md', 'csv', 'json', 'py', 'js', 'html', 'css']:
                with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
                    return file.read()
            
            else:
                return f"[Unsupported file type: {file_extension}]"
                
        except Exception as e:
            return f"[Error reading file: {str(e)}]"

    def generate_response(self, message, history, system_prompt, uploaded_files, 
                         temperature, max_tokens, top_p, top_k, repetition_penalty):
        """Generate response for chatbot"""
        if not self.model_loaded:
            return history + [[message, "‚ùå Model not loaded. Please check console logs."]]

        # Use provided system prompt or default
        if system_prompt is None or system_prompt.strip() == "":
            system_prompt = self.default_system_prompt

        # Process uploaded files
        file_content = ""
        if uploaded_files:
            for file in uploaded_files:
                file_text = self.extract_text_from_file(file.name)
                file_content += f"\n\n--- File: {file.name} ---\n{file_text}\n"

        # Combine file content with user message
        full_message = message
        if file_content:
            full_message = f"{message}\n\nUploaded file content:{file_content}"

        print(f"\n{'='*50}")
        print(f"User: {message}")
        if file_content:
            print(f"Files attached: {len(uploaded_files)}")
        
        start_time = time.perf_counter()

        try:
            # Build conversation history
            messages = [{"role": "system", "content": system_prompt}]
            
            # Add chat history
            for user_msg, assistant_msg in history:
                messages.append({"role": "user", "content": user_msg})
                if assistant_msg:
                    messages.append({"role": "assistant", "content": assistant_msg})
            
            # Add current message
            messages.append({"role": "user", "content": full_message})
            
            # Apply chat template
            text = self.tokenizer.apply_chat_template(
                messages,
                tokenize=False,
                add_generation_prompt=True
            )
            
            # Tokenize
            model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
            
            # Generate
            with torch.no_grad():
                generated_ids = self.model.generate(
                    **model_inputs,
                    max_new_tokens=max_tokens,
                    temperature=temperature,
                    top_p=top_p,
                    top_k=top_k,
                    repetition_penalty=repetition_penalty,
                    do_sample=True if temperature > 0 else False,
                    pad_token_id=self.tokenizer.pad_token_id,
                    eos_token_id=self.tokenizer.eos_token_id,
                )
            
            # Decode only the generated part
            generated_ids = [
                output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
            ]
            
            response_text = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
            
            end_time = time.perf_counter()
            elapsed_time = end_time - start_time
            
            num_tokens = len(generated_ids[0])
            tokens_per_sec = num_tokens / elapsed_time if elapsed_time > 0 else 0
            
            print(f"Assistant: {response_text}")
            print(f"Time: {elapsed_time:.2f}s ({tokens_per_sec:.1f} tokens/s)")
            print(f"{'='*50}\n")
            
            # Add to history
            return history + [[message, response_text]]
                
        except Exception as e:
            error_msg = f"‚ùå Error: {str(e)}"
            print(error_msg)
            return history + [[message, error_msg]]


def create_gradio_interface(chatbot):
    """Create Gradio interface matching the screenshot layout"""
    
    if not chatbot.model_loaded:
        with gr.Blocks(title="Qwen Chatbot - Error") as demo:
            gr.Markdown("# ‚ùå Model Loading Error")
            gr.Markdown("Could not load the Qwen model. Please check console logs.")
        return demo

    # Custom CSS for better layout
    custom_css = """
    #chatbot-container {
        height: 600px;
    }
    #settings-column {
        background-color: #f8f9fa;
        padding: 20px;
        border-radius: 8px;
    }
    """

    with gr.Blocks(title="Qwen 2.5 7B Chatbot", theme=gr.themes.Soft(), css=custom_css) as demo:
        
        gr.Markdown("# ü§ñ Qwen 2.5 7B Instruct Chatbot")
        
        with gr.Row():
            # Middle column - Chat interface
            with gr.Column(scale=3):
                chatbot_ui = gr.Chatbot(
                    label="Chat",
                    height=600,
                    elem_id="chatbot-container",
                    show_copy_button=True
                )
                
                with gr.Row():
                    msg = gr.Textbox(
                        label="Your message",
                        placeholder="Type your message here...",
                        lines=3,
                        scale=4
                    )
                    file_upload = gr.Files(
                        label="üìé",
                        file_count="multiple",
                        scale=1
                    )
                
                with gr.Row():
                    submit_btn = gr.Button("Send", variant="primary", scale=2)
                    clear_btn = gr.Button("Clear", scale=1)

            # Right column - Settings
            with gr.Column(scale=1, elem_id="settings-column"):
                gr.Markdown("### Model Settings")
                
                system_prompt = gr.TextArea(
                    label="System Instructions",
                    value=chatbot.default_system_prompt,
                    lines=4,
                    placeholder="Optional instructions for the model..."
                )
                
                gr.Markdown("### Generation Parameters")
                
                temperature = gr.Slider(
                    minimum=0.0,
                    maximum=2.0,
                    value=0.7,
                    step=0.1,
                    label="Temperature"
                )
                
                max_tokens = gr.Slider(
                    minimum=50,
                    maximum=32000,
                    value=1024,
                    step=50,
                    label="Max tokens"
                )
                
                top_p = gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.9,
                    step=0.05,
                    label="Top P"
                )
                
                top_k = gr.Slider(
                    minimum=1,
                    maximum=100,
                    value=40,
                    step=1,
                    label="Top K"
                )
                
                repetition_penalty = gr.Slider(
                    minimum=1.0,
                    maximum=2.0,
                    value=1.05,
                    step=0.05,
                    label="Repetition Penalty"
                )
                
                with gr.Accordion("Model Info", open=False):
                    gpu_available = torch.cuda.is_available()
                    gpu_name = torch.cuda.get_device_name(0) if gpu_available else "N/A"
                    
                    gr.Markdown(f"""
                    **Model:** Qwen2.5-7B-Instruct  
                    **GPU:** {gpu_name}  
                    **Precision:** FP16  
                    **Context:** 128K tokens
                    """)

        # Chat interaction logic
        def respond(message, chat_history, sys_prompt, files, temp, max_tok, top_p_val, top_k_val, rep_pen):
            if not message.strip() and not files:
                return chat_history, ""
            
            updated_history = chatbot.generate_response(
                message, chat_history, sys_prompt, files,
                temp, max_tok, top_p_val, top_k_val, rep_pen
            )
            return updated_history, ""

        # Event handlers
        submit_btn.click(
            respond,
            inputs=[msg, chatbot_ui, system_prompt, file_upload, 
                   temperature, max_tokens, top_p, top_k, repetition_penalty],
            outputs=[chatbot_ui, msg]
        )
        
        msg.submit(
            respond,
            inputs=[msg, chatbot_ui, system_prompt, file_upload,
                   temperature, max_tokens, top_p, top_k, repetition_penalty],
            outputs=[chatbot_ui, msg]
        )
        
        clear_btn.click(lambda: ([], None), outputs=[chatbot_ui, file_upload])

    return demo


# Initialize and launch
print("Initializing Qwen 2.5 7B Chatbot...")
chatbot = QwenChatbot()

demo = create_gradio_interface(chatbot)

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True,
        show_api=True,
        debug=True
    )

Initializing Qwen 2.5 7B Chatbot...
Loading Qwen 2.5 7B Instruct model...
CUDA available: True
GPU: Tesla T4
GPU Memory: 14.74 GB


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

2025-11-17 19:01:58.374800: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763406118.538198      48 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763406118.585596      48 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.56G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/243 [00:00<?, ?B/s]

Model loaded successfully!
Model device: cuda:0


  chatbot_ui = gr.Chatbot(


* Running on local URL:  http://0.0.0.0:7860
* Running on public URL: https://dafa210e616db61feb.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)



User: what u see
Files attached: 1
Assistant: The uploaded document is a detailed development brief for the creation of an advanced WhatsApp chatbot named Amen AI. The brief outlines the project's objectives, target audience, key features, and technical requirements. Here's a summary of the key points:

### Project Overview
- **Project Name:** Amen AI ‚Äì WhatsApp Chatbot
- **Purpose:** To serve as a trusted companion for individuals of the African diaspora on their heritage journey to Ghana.
- **Persona:** Inspired by the name "Amen," the chatbot is designed to be wise, patient, and comforting.

### Key Objectives
- Address anxiety related to identity and acceptance.
- Transform anxiety into empowerment.
- Serve as a gateway to community connection.
- Provide cultural guidance and emotional support.
- Integrate seamlessly with the broader OurRoots.Africa platform.

### User Pain Points
- **Emotional/Logistical Disconnect:** Users feel logistically prepared but emotionally anxious.
- 