In [None]:
import nltk
nltk.download('all')

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/abc.zip.
[nltk_data]    | Downloading package alpino to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/alpino.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_eng to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_ru.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_rus to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |  

True

In [None]:
keywords = {
    "finance": [
        "stock", "market", "investment", "economy", "bank", "loan", "interest rate",
        "inflation", "currency", "profit", "loss", "revenue", "expenditure",
        "dividend", "portfolio", "trade", "share", "bond", "equity", "financial",
        "gdp", "recession", "fund", "asset", "liability", "tax", "budget", "audit",
        "venture capital", "ipo", "merger", "acquisition", "debt", "credit", "fiscal",
        "earning", "forecast", "yield", "commodity", "derivative", "hedging",
        "exchange", "liquidity", "capital", "underwriting", "broker", "analyst"
    ],
    "medical": [
        "patient", "doctor", "hospital", "disease", "treatment", "diagnosis", "symptom",
        "medication", "surgery", "therapy", "clinic", "health", "illness", "virus",
        "bacteria", "vaccine", "research", "clinical trial", "anatomy", "physiology",
        "pharmacology", "epidemic", "pandemic", "prescription", "drug", "mri", "x-ray",
        "rehabilitation", "nursing", "medical device", "consultation", "prevention",
        "wellness", "pathology", "immunology", "neurology", "cardiology", "oncology",
        "pediatrics", "geriatrics", "diagnosis", "prognosis", "therapy", "biotechnology",
        "medical record", "health care", "clinical study", "pharma"
    ],
    "technology": [
        "software", "hardware", "computer", "internet", "network", "data", "ai",
        "machine learning", "algorithm", "cybersecurity", "development", "programming",
        "code", "app", "website", "cloud computing", "startup", "innovation", "digital",
        "robotics", "automation", "gadget", "smartphone", "blockchain", "virtual reality",
        "augmented reality", "chip", "processor", "server", "database", "api", "ux", "ui",
        "firmware", "protocol", "bandwidth", "encryption", "biometric", "nanotechnology",
        "telecom", "gigabit", "ethernet", "analytic", "big data", "coding", "developer",
        "neural network", "artificial intelligence", "tech company", "operating system"
    ],
    "sports": [
        "game", "match", "team", "player", "score", "win", "lose", "championship", "league",
        "athlete", "coach", "stadium", "tournament", "goal", "point", "record", "training",
        "fitness", "olympic", "world cup", "basketball", "football", "cricket", "tennis",
        "swimming", "runner", "umpire", "referee", "medal", "cup", "season", "pitch", "court",
        "field", "race", "compete", "victory", "defeat", "champion", "trophy", "fixture",
        "event", "fan", "supporter", "league table", "transfer", "sporting event", "playoff", "tournament"
    ],
    "military": [
        "army", "navy", "air force", "soldier", "weapon", "combat", "war", "defense",
        "security", "military operation", "troop", "battle", "intelligence", "commander",
        "strategy", "tactics", "deployment", "veteran", "peacekeeping", "ammunition",
        "artillery", "infantry", "navy seal", "air strike", "missile", "uniform", "drill",
        "sanction", "bunker", "grenade", "camouflage", "reconnaissance", "logistics",
        "fortification", "cadet", "serviceman", "patrol", "conflict", "frontline",
        "regiment", "battalion", "squadron", "fleet", "armored", "counter-terrorism",
        "military base", "armed force", "national guard", "servicemember"
    ]
}

In [None]:
# Strong phrases (multi-word terms that are strong indicators)
strong_phrases = {
    "finance": ["stock market", "interest rate hike", "quarterly earnings", "financial crisis", "venture capital", "fiscal policy", "monetary policy", "investment bank", "credit rating", "economic growth"],
    "medical": ["clinical trial results", "public health emergency", "disease outbreak", "medical breakthrough", "patient care", "emergency room", "side effects", "health care system", "surgical procedure", "drug discovery"],
    "technology": ["artificial intelligence", "machine learning", "cyber security threat", "software development", "cloud computing", "virtual reality", "augmented reality", "data science", "internet of things", "quantum computing", "neural network"],
    "sports": ["world cup final", "championship game", "team victory", "olympic medal", "record breaking", "league leader", "playoff series", "final score", "athlete performance", "sporting event"],
    "military": ["military exercise", "national security", "combat operations", "defense budget", "peacekeeping mission", "armed forces", "intelligence gathering", "strategic deployment", "air superiority", "military alliance", "special forces"]
}

In [None]:
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tag import pos_tag
from nltk.corpus import wordnet
import string
import re

In [None]:
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

In [None]:
def get_wordnet_pos(tag):
    if tag.startswith('A'):
        return wordnet.ADJ
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN

In [None]:
def preprocess_text(text):
  text_lower = text.lower()
  tokens = word_tokenize(text_lower)
  tokens = [word for word in tokens if word not in stop_words]
  tagged_tokens = pos_tag(tokens)
  lemmatized_tokens = []
  for word, tag in tagged_tokens:
    wntag = get_wordnet_pos(tag)
    lemmatized_tokens.append(lemmatizer.lemmatize(word, pos=wntag))
    return lemmatized_tokens

In [None]:
def classify_text_rule_based(text):
  category_scores = {cat: 0 for cat in keywords}
  processed_tokens = preprocess_text(text)
  processed_text_string = " ".join(processed_tokens)
  text_lower = text.lower()
  for category, kws in keywords.items():
    for keyword in kws:
      lemmatized_keyword_tokens = preprocess_text(keyword)
      if lemmatized_keyword_tokens:
        lemmatized_keyword = lemmatized_keyword_tokens[0]
        if lemmatized_keyword in processed_tokens:
          category_scores[category] += processed_tokens.count(lemmatized_keyword)

  for category, phrases in strong_phrases.items():
    for phrase in phrases:
      if phrase in text_lower:
        category_scores[category] += 5 * text_lower.count(phrase)

  if "bank" in text_lower:
    if "river" in text_lower or "tree" in text_lower or "road" in text_lower:
      category_scores["finance"] -= 3

  if "chip" in text_lower:
    if "processor" in text_lower or "semiconductor" in text_lower or "silicon" in text_lower:
      category_scores["technology"] += 5
      category_scores["medical"] -= 1
    elif "implant" in text_lower or "medical device" in text_lower:
      category_scores["medical"] += 5
      category_scores["technology"] -= 1

  if "model" in text_lower:
    if "ai" in text_lower or "machine learning" in text_lower or "data" in text_lower:
      category_scores["technology"] += 3

  if "operations" in text_lower:
    if "military" in text_lower or "combat" in text_lower or "troop" in text_lower:
      category_scores["military"] += 3
      category_scores["finance"] -= 1
      category_scores["medical"] -= 1
    elif "business" in text_lower or "financial" in text_lower or "supply chain" in text_lower:
      category_scores["finance"] += 3
      category_scores["military"] -= 1
      category_scores["medical"] -= 1
    elif "surgical" in text_lower or "patient" in text_lower:
      category_scores["medical"] += 3
      category_scores["military"] -= 1
      category_scores["finance"] -= 1

  if "player" in text_lower:
    if "game console" in text_lower or "video game" in text_lower:
      category_scores["sports"] -= 2
      category_scores["technology"] += 2

  max_score = -1
  predicted_category = "Unclassified"

  for category, score in category_scores.items():
      if score > max_score:
          max_score = score
          predicted_category = category

  if max_score > 0:
      return predicted_category
  else:
      return "Unclassified"

In [None]:
def get_input_and_classify():
    print("Enter the paragraph or text you want to classify.")
    print("Press Enter twice to finish your input.")
    print("-" * 30)

    input_lines = []
    while True:
        try:
            line = input()
            if not line.strip():
                break
            input_lines.append(line)
        except EOFError:
            break
    input_text = "\n".join(input_lines)

    if not input_text.strip():
        print("No text entered. Please try again.")
        return

    print("\nClassifying text...")
    classification = classify_text_rule_based(input_text)
    print(f"Predicted Category: {classification.capitalize()}")
    print("-" * 30)

if __name__ == "__main__":
    while True:
        get_input_and_classify()
        again = input("Classify another text? (yes/no): ").lower().strip()
        if again != 'yes':
            print("Exiting classifier. Goodbye!")
            break

Enter the paragraph or text you want to classify.
Press Enter twice to finish your input.
------------------------------
Financial statements (or financial reports) are formal records of the financial activities and position of a business, person, or other entity. Relevant financial information is presented in a structured manner and in a form which is easy to understand


Classifying text...
Predicted Category: Finance
------------------------------
Classify another text? (yes/no): no
Exiting classifier. Goodbye!


In [None]:
!pip install streamlit -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m70.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m88.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip install pyngrok -q

In [None]:
!ngrok authtoken 30XaS4o0xIVvFbJki0FokewMWxw_45pjqGyT7TvkpTeGR4BNu

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
%%writefile app.py
import streamlit as st
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tag import pos_tag
from nltk.corpus import wordnet
import string
import re

# --- NLTK Downloads and Initializations (Cached for Efficiency) ---
# Use st.cache_resource to ensure these expensive operations run only once
@st.cache_resource
def load_nltk_data():
    # nltk.download('punkt', quiet=True) # Already downloaded in initial setup
    # nltk.download('stopwords', quiet=True)
    # nltk.download('wordnet', quiet=True)
    # nltk.download('averaged_perceptron_tagger', quiet=True)
    return WordNetLemmatizer(), set(stopwords.words('english'))

lemmatizer, stop_words = load_nltk_data()

# --- Helper Functions ---
def get_wordnet_pos(tag):
    if tag.startswith('J'):
        return wordnet.ADJ
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN # Default to noun if no clear POS tag

def preprocess_text(text):
    text_lower = text.lower()
    tokens = word_tokenize(text_lower)
    tokens = [word for word in tokens if word not in string.punctuation and word.isalpha()]
    tokens = [word for word in tokens if word not in stop_words]
    tagged_tokens = pos_tag(tokens)
    lemmatized_tokens = []
    for word, tag in tagged_tokens:
        wntag = get_wordnet_pos(tag)
        lemmatized_tokens.append(lemmatizer.lemmatize(word, pos=wntag))
    return lemmatized_tokens

# --- Define Your Categories and Keywords ---
keywords = {
    "finance": [
        "stock", "market", "investment", "economy", "bank", "loan", "interest rate",
        "inflation", "currency", "profit", "loss", "revenue", "expenditure",
        "dividend", "portfolio", "trade", "share", "bond", "equity", "financial",
        "gdp", "recession", "fund", "asset", "liability", "tax", "budget", "audit",
        "venture capital", "ipo", "merger", "acquisition", "debt", "credit", "fiscal",
        "earning", "forecast", "yield", "commodity", "derivative", "hedging",
        "exchange", "liquidity", "capital", "underwriting", "broker", "analyst"
    ],
    "medical": [
        "patient", "doctor", "hospital", "disease", "treatment", "diagnosis", "symptom",
        "medication", "surgery", "therapy", "clinic", "health", "illness", "virus",
        "bacteria", "vaccine", "research", "clinical trial", "anatomy", "physiology",
        "pharmacology", "epidemic", "pandemic", "prescription", "drug", "mri", "x-ray",
        "rehabilitation", "nursing", "medical device", "consultation", "prevention",
        "wellness", "pathology", "immunology", "neurology", "cardiology", "oncology",
        "pediatrics", "geriatrics", "diagnosis", "prognosis", "therapy", "biotechnology",
        "medical record", "health care", "clinical study", "pharma"
    ],
    "technology": [
        "software", "hardware", "computer", "internet", "network", "data", "ai",
        "machine learning", "algorithm", "cybersecurity", "development", "programming",
        "code", "app", "website", "cloud computing", "startup", "innovation", "digital",
        "robotics", "automation", "gadget", "smartphone", "blockchain", "virtual reality",
        "augmented reality", "chip", "processor", "server", "database", "api", "ux", "ui",
        "firmware", "protocol", "bandwidth", "encryption", "biometric", "nanotechnology",
        "telecom", "gigabit", "ethernet", "analytic", "big data", "coding", "developer",
        "neural network", "artificial intelligence", "tech company", "operating system"
    ],
    "sports": [
        "game", "match", "team", "player", "score", "win", "lose", "championship", "league",
        "athlete", "coach", "stadium", "tournament", "goal", "point", "record", "training",
        "fitness", "olympic", "world cup", "basketball", "football", "cricket", "tennis",
        "swimming", "runner", "umpire", "referee", "medal", "cup", "season", "pitch", "court",
        "field", "race", "compete", "victory", "defeat", "champion", "trophy", "fixture",
        "event", "fan", "supporter", "league table", "transfer"
    ],
    "military": [
        "army", "navy", "air force", "soldier", "weapon", "combat", "war", "defense",
        "security", "military operation", "troop", "battle", "intelligence", "commander",
        "strategy", "tactics", "deployment", "veteran", "peacekeeping", "ammunition",
        "artillery", "infantry", "navy seal", "air strike", "missile", "uniform", "drill",
        "sanction", "bunker", "grenade", "camouflage", "reconnaissance", "logistics",
        "fortification", "cadet", "serviceman", "patrol", "conflict", "frontline",
        "regiment", "battalion", "squadron", "fleet", "armored", "counter-terrorism",
        "military base", "armed force", "national guard", "servicemember"
    ]
}

strong_phrases = {
    "finance": ["stock market", "interest rate hike", "quarterly earnings", "financial crisis", "venture capital", "fiscal policy", "monetary policy", "investment bank", "credit rating", "economic growth"],
    "medical": ["clinical trial results", "public health emergency", "disease outbreak", "medical breakthrough", "patient care", "emergency room", "side effects", "health care system", "surgical procedure", "drug discovery"],
    "technology": ["artificial intelligence", "machine learning", "cyber security threat", "software development", "cloud computing", "virtual reality", "augmented reality", "data science", "internet of things", "quantum computing", "neural network"],
    "sports": ["world cup final", "championship game", "team victory", "olympic medal", "record breaking", "league leader", "playoff series", "final score", "athlete performance", "sporting event"],
    "military": ["military exercise", "national security", "combat operations", "defense budget", "peacekeeping mission", "armed forces", "intelligence gathering", "strategic deployment", "air superiority", "military alliance", "special forces"]
}

# --- Classification Function ---
def classify_text_rule_based(text):
    if not text:
        return "Please enter some text for classification."

    processed_tokens = preprocess_text(text)
    # processed_text_string = " ".join(processed_tokens) # Not strictly needed

    category_scores = {cat: 0 for cat in keywords}

    # Debugging prints (commented out for cleaner Streamlit output)
    # st.write(f"Processed Tokens: {processed_tokens}")
    # st.write("--- Rule 1: Individual Keyword Counts ---")

    # Rule 1: Count individual keyword occurrences in lemmatized tokens
    for category, kws in keywords.items():
        for keyword in kws:
            lemmatized_keyword_tokens = preprocess_text(keyword)
            if lemmatized_keyword_tokens:
                lemmatized_keyword = lemmatized_keyword_tokens[0]
                if lemmatized_keyword in processed_tokens:
                    count = processed_tokens.count(lemmatized_keyword)
                    category_scores[category] += count
                    # st.write(f"  Found '{lemmatized_keyword}' ({count}x) for {category.capitalize()}")
    # st.write(f"Scores after Rule 1: {category_scores}")

    # Rule 2: Score for strong phrases (matched in original lowercased text)
    # st.write("--- Rule 2: Strong Phrase Matches ---")
    text_lower = text.lower() # Use original lowercased text for phrase matching
    for category, phrases in strong_phrases.items():
        for phrase in phrases:
            if phrase in text_lower:
                count = text_lower.count(phrase)
                category_scores[category] += 5 * count
                # st.write(f"  Found strong phrase '{phrase}' ({count}x, +{5*count} pts) for {category.capitalize()}")
    # st.write(f"Scores after Rule 2 (cumulative): {category_scores}")

    # Rule 3: Handle potential ambiguities/exclusions
    # st.write("--- Rule 3: Ambiguity/Exclusion Adjustments ---")
    # Initial scores before ambiguity for debugging (optional)
    # initial_scores_before_ambiguity = category_scores.copy()

    if "bank" in text_lower:
        if "river" in text_lower or "tree" in text_lower or "road" in text_lower:
            category_scores["finance"] -= 3
            # st.write(f"  Adjusting Finance for 'bank' context: -3. New Score: {category_scores['finance']}")

    if "chip" in text_lower:
        if "processor" in text_lower or "semiconductor" in text_lower or "silicon" in text_lower:
            category_scores["technology"] += 5
            category_scores["medical"] -= 1
            # st.write(f"  Adjusting for 'chip' (tech context): Tech +5, Medical -1")
        elif "implant" in text_lower or "medical device" in text_lower:
            category_scores["medical"] += 5
            category_scores["technology"] -= 1
            # st.write(f"  Adjusting for 'chip' (medical context): Medical +5, Tech -1")
        elif "potato" in text_lower or "snack" in text_lower:
             category_scores["technology"] -= 2
             category_scores["medical"] -= 2
             category_scores["sports"] -= 1
             # st.write(f"  Adjusting for 'chip' (food context): Tech -2, Medical -2, Sports -1")

    if "model" in text_lower:
        if "ai" in text_lower or "machine learning" in text_lower or "data" in text_lower:
            category_scores["technology"] += 3
            # st.write(f"  Adjusting for 'model' (AI context): Tech +3")

    if "operations" in text_lower:
        if "military" in text_lower or "combat" in text_lower or "troop" in text_lower:
            category_scores["military"] += 3
            category_scores["finance"] -= 1
            category_scores["medical"] -= 1
            # st.write(f"  Adjusting for 'operations' (military context): Military +3, Finance -1, Medical -1")
        elif "business" in text_lower or "financial" in text_lower or "supply chain" in text_lower:
            category_scores["finance"] += 3
            category_scores["military"] -= 1
            category_scores["medical"] -= 1
            # st.write(f"  Adjusting for 'operations' (business context): Finance +3, Military -1, Medical -1")
        elif "surgical" in text_lower or "patient" in text_lower:
            category_scores["medical"] += 3
            category_scores["military"] -= 1
            category_scores["finance"] -= 1
            # st.write(f"  Adjusting for 'operations' (medical context): Medical +3, Military -1, Finance -1")

    if "player" in text_lower:
        if "game console" in text_lower or "video game" in text_lower:
            category_scores["sports"] -= 2
            category_scores["technology"] += 2
            # st.write(f"  Adjusting for 'player' (gaming context): Sports -2, Tech +2")

    if "drone technology" in text_lower:
        category_scores["military"] += 4
        category_scores["technology"] += 2
        # st.write(f"  Adjusting for 'drone technology': Military +4, Tech +2")

    # st.write(f"Scores after Rule 3 (Final adjustments): {category_scores}")

    # Determine the winning category
    max_score = -1
    predicted_category = "Unclassified"

    for category, score in category_scores.items():
        if score > max_score:
            max_score = score
            predicted_category = category

    if max_score > 0:
        # st.write(f"\nFinal Max Score: {max_score} for {predicted_category.capitalize()}")
        return predicted_category.capitalize() # Capitalize directly here for output consistency
    else:
        # st.write("\nFinal Max Score: 0. Returning Unclassified.")
        return "Unclassified"

# --- Streamlit UI ---
st.set_page_config(page_title="Text Classifier", layout="centered")

st.title("💡 Rule-Based Text Classifier 🚀")
st.markdown("---")

st.markdown("""
Welcome to the Rule-Based Text Classifier!
Enter a paragraph or any piece of text below, and our AI will attempt to classify it into one of these categories:
**Finance, Medical, Technology, Sports, or Military.**
""")

text_input = st.text_area(
    "Enter your text here:",
    height=200,
    placeholder="Example: 'The new AI model improved stock market predictions, leading to significant financial gains. Meanwhile, the basketball team won their championship game.'"
)

if st.button("Classify Text"):
    if text_input:
        with st.spinner("Classifying..."):
            result = classify_text_rule_based(text_input)
            st.success(f"**Predicted Category: {result}**")
            st.markdown("---")
            st.subheader("Category Scores (for debugging/insight):")
            # To show scores, you'd need to modify classify_text_rule_based to return scores too
            # For now, let's just show a placeholder or adapt the function to return it.
            # A simpler way for a demo is to re-calculate, but that's inefficient.
            # Best practice: make classify_text_rule_based return (category, scores_dict)

            # For demonstration, let's just re-run parts of the logic to get scores for display.
            # In a real app, modify classify_text_rule_based to return the scores dict as well.
            temp_processed_tokens = preprocess_text(text_input)
            temp_category_scores = {cat: 0 for cat in keywords}

            for category, kws in keywords.items():
                for keyword in kws:
                    lemmatized_keyword_tokens = preprocess_text(keyword)
                    if lemmatized_keyword_tokens:
                        lemmatized_keyword = lemmatized_keyword_tokens[0]
                        if lemmatized_keyword in temp_processed_tokens:
                            temp_category_scores[category] += temp_processed_tokens.count(lemmatized_keyword)

            text_lower_temp = text_input.lower()
            for category, phrases in strong_phrases.items():
                for phrase in phrases:
                    if phrase in text_lower_temp:
                        temp_category_scores[category] += 5 * text_lower_temp.count(phrase)

            # Re-apply ambiguity rules for display
            if "bank" in text_lower_temp:
                if "river" in text_lower_temp or "tree" in text_lower_temp or "road" in text_lower_temp:
                    temp_category_scores["finance"] -= 3
            if "chip" in text_lower_temp:
                if "processor" in text_lower_temp or "semiconductor" in text_lower_temp or "silicon" in text_lower_temp:
                    temp_category_scores["technology"] += 5
                    temp_category_scores["medical"] -= 1
                elif "implant" in text_lower_temp or "medical device" in text_lower_temp:
                    temp_category_scores["medical"] += 5
                    temp_category_scores["technology"] -= 1
                elif "potato" in text_lower_temp or "snack" in text_lower_temp:
                    temp_category_scores["technology"] -= 2
                    temp_category_scores["medical"] -= 2
                    temp_category_scores["sports"] -= 1
            if "model" in text_lower_temp:
                if "ai" in text_lower_temp or "machine learning" in text_lower_temp or "data" in text_lower_temp:
                    temp_category_scores["technology"] += 3
            if "operations" in text_lower_temp:
                if "military" in text_lower_temp or "combat" in text_lower_temp or "troop" in text_lower_temp:
                    temp_category_scores["military"] += 3
                    temp_category_scores["finance"] -= 1
                    temp_category_scores["medical"] -= 1
                elif "business" in text_lower_temp or "financial" in text_lower_temp or "supply chain" in text_lower_temp:
                    temp_category_scores["finance"] += 3
                    temp_category_scores["military"] -= 1
                    temp_category_scores["medical"] -= 1
                elif "surgical" in text_lower_temp or "patient" in text_lower_temp:
                    temp_category_scores["medical"] += 3
                    temp_category_scores["military"] -= 1
                    temp_category_scores["finance"] -= 1
            if "player" in text_lower_temp:
                if "game console" in text_lower_temp or "video game" in text_lower_temp:
                    temp_category_scores["sports"] -= 2
                    temp_category_scores["technology"] += 2
            if "drone technology" in text_lower_temp:
                temp_category_scores["military"] += 4
                temp_category_scores["technology"] += 2

            # Sort scores for better display
            sorted_scores = sorted(temp_category_scores.items(), key=lambda item: item[1], reverse=True)
            for cat, score in sorted_scores:
                st.write(f"- **{cat.capitalize()}:** {score} points")

    else:
        st.warning("Please enter some text to classify.")

st.markdown("---")
st.markdown("Built with ❤️ using Streamlit & NLTK")

Writing app.py


In [None]:
from pyngrok import ngrok
import subprocess
import os

# Kill any existing ngrok processes (useful if you restart cells multiple times)
!kill -9 $(lsof -t -i:8501) >/dev/null 2>&1 || true
!kill -9 $(lsof -t -i:4040) >/dev/null 2>&1 || true # For ngrok's own UI

# Start Streamlit app in the background
print("Starting Streamlit app in the background...")
# Use nohup to detach the process, & to run in background
# This command prevents Streamlit from blocking the Colab cell
process = subprocess.Popen(['nohup', 'streamlit', 'run', 'app.py', '--server.port', '8501', '--server.enableCORS', 'false', '--server.enableXsrfProtection', 'false'],
                           stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

# Give Streamlit a moment to start
import time
time.sleep(5)

# Connect ngrok to Streamlit's port 8501
print("Connecting ngrok tunnel...")
public_url = ngrok.connect(8501)
print(f"Your Streamlit app is live at: {public_url}")

# Optional: Print Streamlit's server output (for debugging if app doesn't load)
# You might see logs like "You can now view your Streamlit app in your browser."
# and connection messages from ngrok.
# If you don't see the app, comment out time.sleep(5) and try to read output more carefully
# for line in iter(process.stdout.readline, ''):
#     print(line, end='')
#     if "You can now view your Streamlit app in your browser." in line:
#         break

Starting Streamlit app in the background...
Connecting ngrok tunnel...
Your Streamlit app is live at: NgrokTunnel: "https://b6fce3def7ef.ngrok-free.app" -> "http://localhost:8501"
