In [None]:
# READ DATA FROM result.json
json_file = './result.json' # use your own path to result.json

import pandas as pd
import json
from datetime import datetime
import matplotlib.pyplot as plt


with open(json_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

all_messages = []
chat_list = []

for chat in data['chats']['list']:
    chat_id = chat.get("id")
    chat_name = chat.get("name", "Saved Messages")
    chat_type = chat.get("type")

    chat_list.append({
        "chat_id": chat_id,
        "chat_name": chat_name,
        "chat_type": chat_type
    })

    messages = chat.get("messages", [])
    for msg in messages:
        text = msg.get("text")
        if isinstance(text, list):
            text = ''.join(
                part["text"] if isinstance(part, dict) else part
                for part in text
            )
        all_messages.append({
            "chat_type": chat.get("type"),
            "chat_id": chat.get("id"),
            "chat_name": chat.get("name", "Saved Messages"),
            "message_id": msg.get("id"),
            "date": msg.get("date"),
            "from": msg.get("from"),
            "from_id": msg.get("from_id"),
            "text": text,
            "media_type": msg.get("media_type"),
            "file": msg.get("file"),
            "photo": msg.get("photo"),
            "forwarded_from": msg.get("forwarded_from"),
        })

msg_df = pd.DataFrame(all_messages)
chat_df = pd.DataFrame(chat_list)

msg_df['date'] = pd.to_datetime(msg_df['date'])

chat_df.head(10)
msg_df.head(10)

In [None]:
# ALL CHATS REPORT
messages_per_chat = msg_df.groupby(['chat_id', 'chat_name']).size().reset_index(name='message_count').sort_values('message_count', ascending=False)

# визуализация сообщений по чатам
plt.figure(figsize=(max(12, len(messages_per_chat)*0.5), 6))  # ширина зависит от числа чатов
messages_per_chat.plot(
    kind='bar',
    x='chat_name',
    y='message_count',
    legend=False,
    figsize=(max(12, len(messages_per_chat)*0.5), 6)
)
plt.xlabel('Chat')
plt.ylabel('Number of messages')
plt.title('messages per chat')
plt.xticks(rotation=90, ha='center')
plt.tight_layout()
plt.show()

total_messages = msg_df.size
print(f'total_messages: {total_messages}')

In [None]:
# AVAILABLE CHATS

print(f'available chat list: {chat_df['chat_name'].unique()}')

In [None]:
# CHAT REPORT
target_chat_name = "use chat from available chats"
selected_years = [2024, 2025]  # years for report

chat_df = df[df['chat_name'] == target_chat_name]
chat_df = chat_df[chat_df['date'].dt.year.isin(selected_years)]

messages_grouped = chat_df.groupby([
    chat_df['date'].dt.to_period('M'),
    chat_df['from']
]).size().unstack(fill_value=0)

total_messages_by_user = messages_grouped.sum().sort_values(ascending=False)

print(total_messages_by_user)

messages_grouped.plot(
    kind='bar',
    stacked=False,  # можно stacked=False для обычного multi-bar
    figsize=(20,8)
)

plt.xlabel('Month')
plt.ylabel('Number of messages')
plt.title(f'Messages by month and user: {target_chat_name}')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# AVAILABLE PERSON LIST

print(f'Available for person: {msg_df['from'].unique()}')

In [None]:
## PERSON REPORT

person = 'person name. Paste into this field from available person'

import seaborn as sns
from collections import Counter

# filter for person
person_df = msg_df[msg_df['from'] == person].copy()

person_df['hour'] = person_df['date'].dt.hour
person_df['weekday'] = person_df['date'].dt.dayofweek  # 0=Monday, 6=Sunday

heatmap_data = person_df.groupby(['weekday', 'hour']).size().unstack(fill_value=0)

plt.figure(figsize=(18,6))
sns.heatmap(heatmap_data, cmap="YlGnBu", linewidths=.5, annot=True, fmt="d")
plt.title(f"Activity by hours and day of week: {person}")
plt.xlabel("Hours")
plt.ylabel("Day of week (0=Mon, 6=Sun)")
plt.yticks(rotation=0)
plt.show()



# 50 most popular words from 5 symbols ---
texts = person_df['text'].dropna().tolist()
all_words = []

for t in texts:
    words = str(t).lower().split()
    filtered = [w for w in words if len(w) > 5]
    all_words.extend(filtered)
counter = Counter(all_words)
top_words = counter.most_common(50)

print(f"\n--- 50 most popular words from {person} ---")
for word, count in top_words:
    print(f"{word}: {count}")



# --- 2. Average length in words ---
lengths = [
    len(str(t).split()) for t in texts
]
average_length = sum(lengths) / len(lengths) if lengths else 0
print(f"\n--- Average length in words: {average_length:.2f} ---")