In [2]:
import pandas as pd
import re
import matplotlib.pyplot as plt
from IPython.display import display, Markdown

import google.generativeai as genai

# Parse WhatsApp chat file
def parse_whatsapp_chat(file_path):
	# Pattern to match WhatsApp message format: DD/MM/YYYY, HH:MM - Sender: Message
	pattern = re.compile(r'^(\d{1,2}/\d{1,2}/\d{4}), (\d{1,2}:\d{2}) - ([^:]+): (.+)$', re.MULTILINE)
	
	messages = []
	with open(file_path, 'r', encoding='utf-8') as file:
		content = file.read()
		for match in pattern.finditer(content):
			date, time, sender, message = match.groups()
			messages.append({
				'date': date,
				'time': time,
				'sender': sender.strip(),
				'message': message
			})
	
	# Convert to DataFrame
	df = pd.DataFrame(messages)
	
	# Convert date and time to datetime
	df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'], format='%d/%m/%Y %H:%M')
	
	return df

# Load the WhatsApp chat data
df = parse_whatsapp_chat("whatsapp_chat.txt")

# Filter messages from the year 2025
df_2025 = df[df['datetime'].dt.year == 2025]

# Filter and save for March 2025
march_data = df_2025[df_2025['datetime'].dt.month == 3]
march_data.to_csv('march_2025_chats.csv', index=False)
print(f"March 2025 messages: {len(march_data)} saved to march_2025_chats.csv")

# Filter and save for April 2025
april_data = df_2025[df_2025['datetime'].dt.month == 4]
april_data.to_csv('april_2025_chats.csv', index=False)
print(f"April 2025 messages: {len(april_data)} saved to april_2025_chats.csv")

# Filter and save for May 2025
may_data = df_2025[df_2025['datetime'].dt.month == 5]
may_data.to_csv('may_2025_chats.csv', index=False)
print(f"May 2025 messages: {len(may_data)} saved to may_2025_chats.csv")

# Summary
print(f"Total messages: {len(df)}")
print(f"Total 2025 messages: {len(df_2025)}")



March 2025 messages: 3205 saved to march_2025_chats.csv
April 2025 messages: 3664 saved to april_2025_chats.csv
May 2025 messages: 1054 saved to may_2025_chats.csv
Total messages: 30303
Total 2025 messages: 13656
