# Telegram Chat Dump Analyzer

This notebook parses a Telegram chat history export (JSON) to find questions, requests for help, or specific keywords. 
It works entirely within Google Colab and does not send data to external servers.

In [None]:
# 1. Setup & Imports
!pip install --quiet pandas ipywidgets

import json
import pandas as pd
from google.colab import files, sheets
from IPython.display import display, clear_output
import ipywidgets as widgets

print("Libraries installed and imported.")

In [None]:
# 2. Configuration
# Define keywords to search for in messages
KEYWORDS = [
    "–ø–æ–¥—Å–∫–∞–∂–∏—Ç–µ", "–≤–æ–ø—Ä–æ—Å", "#–≤–æ–ø—Ä–æ—Å", "–ø–æ–º–æ—â—å", "–ø–æ–º–æ–≥–∏—Ç–µ", 
    "question", "help", "how to", "error"
]

print(f"Current keywords: {KEYWORDS}")

In [None]:
# 3. Upload File
print("üì§ Please upload your Telegram JSON dump (result.json).")
uploaded = files.upload()

if uploaded:
    input_file = list(uploaded.keys())[0]
    print(f"‚úÖ File loaded: {input_file}")
else:
    print("‚ùå No file uploaded.")

In [None]:
# 4. Parsing Logic
def message_contains_keywords(text: str, keywords: list) -> bool:
    """Checks if message text contains any of the keywords."""
    text_lower = text.lower()
    return any(keyword.lower() in text_lower for keyword in keywords)

def extract_questions(input_file: str):
    try:
        with open(input_file, "r", encoding="utf-8") as f:
            data = json.load(f)
    except Exception as e:
        print(f"Error reading file: {e}")
        return pd.DataFrame()

    messages = data.get("messages", data)
    rows = []

    for msg in messages:
        if isinstance(msg, dict):
            text = msg.get("text", "")
            # Handle Telegram's nested text entities
            if isinstance(text, list):
                text = "".join([t if isinstance(t, str) else t.get("text", "") for t in text])

            if isinstance(text, str) and message_contains_keywords(text, KEYWORDS):
                name = msg.get("from", "‚Äî")
                username = msg.get("from_id", "‚Äî")
                date = msg.get("date", "‚Äî")

                rows.append({
                    "Date": date,
                    "User": name,
                    "ID": username,
                    "Message": text.strip()
                })

    df = pd.DataFrame(rows)
    if not df.empty:
        try:
            df["Date"] = pd.to_datetime(df["Date"])
        except Exception:
            pass
    return df

if 'input_file' in locals():
    df = extract_questions(input_file)
    print(f"üîç Found {len(df)} messages matching keywords.")
else:
    print("‚ö†Ô∏è Please upload a file first.")

In [None]:
# 5. Interactive Dashboard
if 'df' in locals() and not df.empty:
    name_filter = widgets.Text(description='User:')
    tag_filter = widgets.Text(description='ID:')
    msg_filter = widgets.Text(description='Message:')
    
    def filter_table(change=None):
        filtered = df[
            df["User"].str.contains(name_filter.value, case=False, na=False) &
            df["ID"].astype(str).str.contains(tag_filter.value, case=False, na=False) &
            df["Message"].str.contains(msg_filter.value, case=False, na=False)
        ]
        clear_output(wait=True)
        display(ui, filtered)

    ui = widgets.VBox([name_filter, tag_filter, msg_filter])
    out = widgets.interactive_output(filter_table, {
        'change': widgets.fixed(None)
    })

    display(ui, df)
else:
    print("‚ö†Ô∏è No data to display.")

In [None]:
# 6. Export to Google Sheets (Interactive)
if 'df' in locals() and not df.empty:
    sheet = sheets.InteractiveSheet(df=df)
    print("Data loaded into Interactive Sheet below:")
    display(sheet)