In [None]:
# Complete NLP App for Google Colab
# Text Summarization and Named Entity Recognition

# ============================================================================
# STEP 1: INSTALL REQUIRED PACKAGES
# ============================================================================

!pip install gradio transformers torch python-dotenv requests

# ============================================================================
# STEP 2: IMPORTS AND SETUP
# ============================================================================

import os
import gradio as gr
import requests
import json
from transformers import pipeline
from IPython.display import display, HTML
import warnings
warnings.filterwarnings('ignore')

# ============================================================================
# STEP 3: SETUP HUGGING FACE API (Optional - you can use local models instead)
# ============================================================================

# Method 1: Use Hugging Face API (requires API key)
# Uncomment these lines if you have a HuggingFace API key:

# HF_API_KEY = "your_hugging_face_api_key_here"  # Replace with your actual API key
#
# def get_completion_api(inputs, parameters=None, endpoint_url="https://api-inference.huggingface.co/models/facebook/bart-large-cnn"):
#     headers = {
#         "Authorization": f"Bearer {HF_API_KEY}",
#         "Content-Type": "application/json"
#     }
#     data = {"inputs": inputs}
#     if parameters is not None:
#         data.update({"parameters": parameters})
#
#     response = requests.post(endpoint_url, headers=headers, json=data)
#     return response.json()

# ============================================================================
# STEP 4: LOCAL MODEL SETUP (Recommended for Colab)
# ============================================================================

print("Loading models locally...")

# Load summarization model
try:
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    print("✅ Summarization model loaded successfully")
except Exception as e:
    print(f"❌ Error loading summarization model: {e}")
    # Fallback to smaller model
    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
    print("✅ Fallback summarization model loaded")

# Load NER model
try:
    ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
    print("✅ NER model loaded successfully")
except Exception as e:
    print(f"❌ Error loading NER model: {e}")
    ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
    print("✅ Fallback NER model loaded")

print("All models ready! 🚀")

# ============================================================================
# STEP 5: HELPER FUNCTIONS
# ============================================================================

def summarize_text(input_text):
    """Summarize input text using local model"""
    try:
        if len(input_text.strip()) < 50:
            return "Input text is too short for summarization. Please provide at least 50 characters."

        # Adjust max_length based on input length
        input_length = len(input_text.split())
        max_length = min(150, max(30, input_length // 3))
        min_length = min(30, max_length // 2)

        result = summarizer(input_text, max_length=max_length, min_length=min_length, do_sample=False)
        return result[0]['summary_text']
    except Exception as e:
        return f"Error in summarization: {str(e)}"

def extract_entities(input_text):
    """Extract named entities from input text"""
    try:
        if len(input_text.strip()) < 5:
            return {"text": input_text, "entities": []}

        # Get NER results
        entities = ner_pipeline(input_text)

        # Format for Gradio HighlightedText
        formatted_entities = []
        for entity in entities:
            formatted_entities.append({
                "entity": entity["entity_group"],
                "word": entity["word"],
                "start": entity["start"],
                "end": entity["end"],
                "score": round(entity["score"], 3)
            })

        return {"text": input_text, "entities": formatted_entities}
    except Exception as e:
        return {"text": input_text, "entities": []}

# ============================================================================
# STEP 6: CREATE GRADIO INTERFACES
# ============================================================================

def create_summarization_app():
    """Create the text summarization interface"""

    # Sample texts for demonstration
    examples = [
        """The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.""",

        """Artificial intelligence (AI) is intelligence demonstrated by machines, in contrast to the natural intelligence displayed by humans and animals. Leading AI textbooks define the field as the study of "intelligent agents": any device that perceives its environment and takes actions that maximize its chance of successfully achieving its goals. Colloquially, the term "artificial intelligence" is often used to describe machines that mimic "cognitive" functions that humans associate with the human mind, such as "learning" and "problem solving".""",

        """Climate change refers to long-term shifts in temperatures and weather patterns. These shifts may be natural, such as through variations in the solar cycle. But since the 1800s, human activities have been the main driver of climate change, primarily due to burning fossil fuels like coal, oil and gas. Burning fossil fuels generates greenhouse gas emissions that act like a blanket wrapped around the Earth, trapping the sun's heat and raising temperatures."""
    ]

    interface = gr.Interface(
        fn=summarize_text,
        inputs=[
            gr.Textbox(
                label="Text to Summarize",
                lines=8,
                placeholder="Enter your text here (minimum 50 characters)..."
            )
        ],
        outputs=[
            gr.Textbox(
                label="Summary",
                lines=4
            )
        ],
        title="AI Text Summarisation",
        description="Summarise any text using Facebook's BART model. The model works best with longer texts (articles, documents, etc.)",
        examples=examples,
        allow_flagging="never",
        theme="default"
    )

    return interface

def create_ner_app():
    """Create the Named Entity Recognition interface"""

    examples = [
        "My name is Andrew, I'm building DeepLearningAI and I live in California",
        "Apple Inc. is planning to open a new store in New York City next month",
        "Elon Musk founded SpaceX in 2002 and Tesla Motors in 2003",
        "The United Nations headquarters is located in Manhattan, New York",
        "Microsoft was founded by Bill Gates and Paul Allen in Albuquerque, New Mexico"
    ]

    interface = gr.Interface(
        fn=extract_entities,
        inputs=[
            gr.Textbox(
                label="Text for Entity Recognition",
                lines=3,
                placeholder="Enter text to find people, organizations, and locations..."
            )
        ],
        outputs=[
            gr.HighlightedText(
                label="Identified Entities",
                combine_adjacent=True
            )
        ],
        title="🎯 Named Entity Recognition",
        description="Find and highlight people (PER), organizations (ORG), locations (LOC), and other entities in your text using BERT",
        examples=examples,
        allow_flagging="never",
        theme="default"
    )

    return interface

# ============================================================================
# STEP 7: CREATE COMBINED APP WITH TABS
# ============================================================================

def create_combined_app():
    """Create a combined app with both summarization and NER"""

    with gr.Blocks(title="NLP Toolkit", theme="default") as app:
        gr.Markdown("""
        # 🤖 NLP Toolkit
        ## Powered by Hugging Face Transformers

        This toolkit provides two powerful NLP capabilities:
        - **Text Summarization**: Condense long texts into concise summaries
        - **Named Entity Recognition**: Identify people, organizations, and locations in text
        """)

        with gr.Tabs():
            with gr.TabItem("📝 Text Summarization"):
                with gr.Row():
                    with gr.Column():
                        sum_input = gr.Textbox(
                            label="Text to Summarize",
                            lines=10,
                            placeholder="Paste your article, document, or long text here..."
                        )
                        sum_button = gr.Button("Summarize", variant="primary")

                    with gr.Column():
                        sum_output = gr.Textbox(
                            label="Summary",
                            lines=6
                        )

                gr.Examples(
                    examples=[
                        """The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930."""
                    ],
                    inputs=sum_input
                )

            with gr.TabItem("🎯 Named Entity Recognition"):
                with gr.Row():
                    with gr.Column():
                        ner_input = gr.Textbox(
                            label="Text for Entity Recognition",
                            lines=5,
                            placeholder="Enter text to identify people, organizations, locations..."
                        )
                        ner_button = gr.Button("Find Entities", variant="primary")

                    with gr.Column():
                        ner_output = gr.HighlightedText(
                            label="Identified Entities",
                            combine_adjacent=True
                        )

                gr.Examples(
                    examples=[
                        "My name is Andrew, I'm building DeepLearningAI and I live in California",
                        "Apple Inc. is planning to open a new store in New York City next month"
                    ],
                    inputs=ner_input
                )

        # Event handlers
        sum_button.click(fn=summarize_text, inputs=sum_input, outputs=sum_output)
        ner_button.click(fn=extract_entities, inputs=ner_input, outputs=ner_output)

    return app

# ============================================================================
# STEP 8: LAUNCH THE APP
# ============================================================================

if __name__ == "__main__":
    print("\n" + "="*50)
    print("🚀 LAUNCHING NLP TOOLKIT")
    print("="*50)

    # Choose which app to launch:

    # Option 1: Launch individual apps
    # print("Launching Summarization App...")
    # summarization_app = create_summarization_app()
    # summarization_app.launch(share=True, debug=True)

    # Option 2: Launch combined app (recommended)
    print("Launching Combined NLP Toolkit...")
    combined_app = create_combined_app()
    combined_app.launch(
        share=True,  # Creates a public link you can share
        debug=True,  # Shows detailed error messages
        server_port=7860,  # Default Gradio port
        server_name="0.0.0.0"  # Allow external connections
    )

# ============================================================================
# ADDITIONAL UTILITY FUNCTIONS
# ============================================================================

def test_models():
    """Test both models with sample inputs"""
    print("\n🧪 Testing Models...")

    # Test summarization
    test_text = "Artificial intelligence is revolutionizing many industries. Machine learning algorithms can now process vast amounts of data and make predictions with remarkable accuracy. This technology is being applied in healthcare, finance, transportation, and many other sectors."

    try:
        summary = summarize_text(test_text)
        print(f"✅ Summarization test passed: {summary[:50]}...")
    except Exception as e:
        print(f"❌ Summarization test failed: {e}")

    # Test NER
    test_ner = "Barack Obama was the President of the United States and lived in Washington."

    try:
        entities = extract_entities(test_ner)
        print(f"✅ NER test passed: Found {len(entities['entities'])} entities")
    except Exception as e:
        print(f"❌ NER test failed: {e}")

# Run tests
test_models()

# Instructions for use
print("""
📋 INSTRUCTIONS:
1. Run all cells in order
2. The app will launch with a public URL
3. You can share this URL with others
4. Both summarization and NER tools are available in tabs

🔑 OPTIONAL: If you have a HuggingFace API key:
1. Uncomment the API code section
2. Replace 'your_hugging_face_api_key_here' with your actual key
3. This will use cloud models instead of local ones

💡 TIPS:
- For summarization: Use longer texts (100+ words) for best results
- For NER: Works with any text length
- Click the examples to try sample inputs
""")

Loading models locally...


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


✅ Summarization model loaded successfully


config.json:   0%|          | 0.00/829 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/59.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cpu


✅ NER model loaded successfully
All models ready! 🚀

🚀 LAUNCHING NLP TOOLKIT
Launching Combined NLP Toolkit...
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://c46c84feba231d601c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
