# Fully Automatic WhatsApp Analyzer (String-Split Method)

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from tkinter import Tk
from tkinter.filedialog import askopenfilename

In [None]:
def run_whatsapp_analysis():
    # Hide main Tk window
    Tk().withdraw()

    #Ask user to select WhatsApp export
    file_path = askopenfilename(title="Select WhatsApp Chat .txt file", filetypes=[("Text files", "*.txt")])
    if not file_path:
        print("❌ No file selected. Please run the cell again and select a WhatsApp .txt file.")
        return

    print(f"✅ File selected: {file_path}")

    #Parse chat
    cleaned_data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        dataset = f.readlines()

    for line in dataset:
        try:
            date = line.split(',')[0]                   # Date
            line2 = line[len(date):]                    # Remove date
            time = line2.split('-')[0][2:]             # Time
            line3 = line2[len(time):]                  # Remove time
            name = line3.split(':')[0][4:]             # Name
            line4 = line3[len(name):]                  # Remove name
            message = line4[6:-1]                      # Message

            if date.strip() and time.strip() and name.strip() and message.strip():
                cleaned_data.append([date.strip(), time.strip(), name.strip(), message.strip()])
        except Exception:
            continue

    df = pd.DataFrame(cleaned_data, columns=['Date', 'Time', 'Name', 'Message'])
    df['Length of Message'] = df['Message'].str.len()

    if df.empty:
        print("❌ No messages were parsed. Please check your WhatsApp export format.")
        return

    print(f"✅ WhatsApp chat parsed successfully! Total messages: {len(df)}")

    #Basic Stats + Visualizations
    # Top 5 active people
    top_senders = df['Name'].value_counts().head(5)
    print("\n--- Top 5 Active People ---")
    print(top_senders)
    
    plt.figure(figsize=(8,5))
    top_senders.plot(kind='bar', color='skyblue')
    plt.title("Top 5 Active People")
    plt.ylabel("Number of Messages")
    plt.xlabel("Name")
    plt.xticks(rotation=45)
    plt.show()
    
    # Top 5 longest messages
    top_long_messages = df.sort_values('Length of Message', ascending=False).head(5)
    print("\n--- Top 5 Longest Messages ---")
    display(top_long_messages[['Name','Message','Length of Message']])
    
    # Messages per day
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    messages_per_day = df['Date'].value_counts().sort_index()
    print("\n--- Messages per Day (First 10 Days) ---")
    print(messages_per_day.head(10))
    
    plt.figure(figsize=(12,5))
    messages_per_day.plot(kind='line', marker='o', color='orange')
    plt.title("Messages Per Day")
    plt.ylabel("Number of Messages")
    plt.xlabel("Date")
    plt.xticks(rotation=45)
    plt.show()
    
    # Save cleaned CSV
    cleaned_csv_path = "cleaned_whatsapp_chat.csv"
    df.to_csv(cleaned_csv_path, index=False)
    print(f"\n✅ Cleaned chat saved as '{cleaned_csv_path}'. You can now run NLP on it.")

#Run the analyzer
run_whatsapp_analysis()