In [None]:
import pandas as pd
import re
import matplotlib.pyplot as plt
from IPython.display import display, Markdown

import google.generativeai as genai

# Parse WhatsApp chat file
def parse_whatsapp_chat(file_path):
	# Pattern to match WhatsApp message format: DD/MM/YYYY, HH:MM - Sender: Message
	pattern = re.compile(r'^(\d{1,2}/\d{1,2}/\d{4}), (\d{1,2}:\d{2}) - ([^:]+): (.+)$', re.MULTILINE)
	
	messages = []
	with open(file_path, 'r', encoding='utf-8') as file:
		content = file.read()
		for match in pattern.finditer(content):
			date, time, sender, message = match.groups()
			messages.append({
				'date': date,
				'time': time,
				'sender': sender.strip(),
				'message': message
			})
	
	# Convert to DataFrame
	df = pd.DataFrame(messages)
	
	# Convert date and time to datetime
	df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'], format='%d/%m/%Y %H:%M')
	
	return df

# Load the WhatsApp chat data
df = parse_whatsapp_chat("whatsapp_chat.txt")

# Filter messages from the year 2025
df_2025 = df[df['datetime'].dt.year == 2025]

# Filter and save for March 2025
march_data = df_2025[df_2025['datetime'].dt.month == 3]
march_data.to_csv('march_2025_chats.csv', index=False)
print(f"March 2025 messages: {len(march_data)} saved to march_2025_chats.csv")

# Filter and save for April 2025
april_data = df_2025[df_2025['datetime'].dt.month == 4]
april_data.to_csv('april_2025_chats.csv', index=False)
print(f"April 2025 messages: {len(april_data)} saved to april_2025_chats.csv")

# Filter and save for May 2025
may_data = df_2025[df_2025['datetime'].dt.month == 5]
may_data.to_csv('may_2025_chats.csv', index=False)
print(f"May 2025 messages: {len(may_data)} saved to may_2025_chats.csv")

# Summary
print(f"Total messages: {len(df)}")
print(f"Total 2025 messages: {len(df_2025)}")



In [None]:
# Read the data files
march_data = pd.read_csv('march_2025_chats.csv')
april_data = pd.read_csv('april_2025_chats.csv')
may_data = pd.read_csv('may_2025_chats.csv')

# Ensure datetime column is properly formatted
for df in [march_data, april_data, may_data]:
    if df['datetime'].dtype != 'datetime64[ns]':
        df['datetime'] = pd.to_datetime(df['datetime'])

# Process each month separately
def extract_link_downs_with_tickets(data, month_name):
    # Extract link down messages from LNOC
    link_down_messages = data[
        (data['sender'].str.strip() == 'Lnoc') & 
        (data['message'].str.contains('[Ll]ink [Dd]own', regex=True))
    ]
    
    # Initialize results
    results = []
    
    # Process each link down message
    for idx, row in link_down_messages.iterrows():
        # Extract details from current message
        result = {
            'datetime': row['datetime'],
            'link_down_message': row['message'],
            'net_ticket': 'Not found'
        }
        
        # Look for NET ticket in subsequent messages (within 15 minutes)
        time_window = row['datetime'] + pd.Timedelta(minutes=15)
        subsequent_messages = data[
            (data['datetime'] > row['datetime']) & 
            (data['datetime'] <= time_window)
        ]
        
        # Search for NET ticket pattern
        for _, next_msg in subsequent_messages.iterrows():
            net_match = re.search(r'NET\d+', next_msg['message'])
            if net_match:
                result['net_ticket'] = net_match.group(0)
                break
        
        results.append(result)
    
    # Create DataFrame and save to CSV
    if results:
        result_df = pd.DataFrame(results)
        filename = f'{month_name}_link.csv'
        result_df.to_csv(filename, index=False)
        print(f"Saved {len(result_df)} link downs to {filename}")
        return result_df
    else:
        print(f"No link downs found for {month_name} 2025")
        return pd.DataFrame()

# Process each month
march_results = extract_link_downs_with_tickets(march_data, 'march')
april_results = extract_link_downs_with_tickets(april_data, 'april')
may_results = extract_link_downs_with_tickets(may_data, 'may')

# Also create a combined file with all months
all_results = pd.concat([march_results, april_results, may_results], ignore_index=True)

if not all_results.empty:
    print(f"\nTotal link downs found across all months: {len(all_results)}")
    print("Summary of found NET tickets by month:")
    for month, df in [('March', march_results), ('April', april_results), ('May', may_results)]:
        if not df.empty:
            tickets_found = df['net_ticket'].ne('Not found').sum()
            print(f"  {month}: {tickets_found} of {len(df)} link downs have NET tickets")
else:
    print("No link downs found in any month")

# Display the first few entries from the combined results
if not all_results.empty:
    all_results.head()


In [None]:
# Clean the data to only show link down messages with NET tickets

def clean_and_save_link_data(results_df, month_name):
    # Filter only records that have a NET ticket (not 'Not found')
    links_with_tickets = results_df[results_df['net_ticket'] != 'Not found'].copy()
    
    # Extract the endpoints from the link down message
    links_with_tickets['link_endpoints'] = links_with_tickets['link_down_message'].str.extract(r'[Ll]ink [Dd]own \| ([^|]+)')
    
    # Create a clean version with only essential columns
    clean_df = links_with_tickets[['datetime', 'link_endpoints', 'net_ticket']]
    
    # Save to CSV
    filename = f'{month_name}_escalated_links.csv'
    clean_df.to_csv(filename, index=False)
    return clean_df, len(clean_df)

# Process each month
march_clean, march_count = clean_and_save_link_data(march_results, 'march')
april_clean, april_count = clean_and_save_link_data(april_results, 'april')
may_clean, may_count = clean_and_save_link_data(may_results, 'may')

# Display summary
display(Markdown("## Summary of Escalated Link Downs"))
print(f"March 2025: {march_count} escalated link downs with NET tickets")
print(f"April 2025: {april_count} escalated link downs with NET tickets")
print(f"May 2025: {may_count} escalated link downs with NET tickets")
print(f"Total: {march_count + april_count + may_count} escalated link downs with NET tickets")

# Create a combined version
all_clean = pd.concat([march_clean, april_clean, may_clean], ignore_index=True)
all_clean['month'] = all_clean['datetime'].dt.strftime('%B')

# Display sample of the clean data
display(Markdown("## Sample of Escalated Link Down Messages with NET Tickets"))
display(all_clean.head(10))

# Create monthly visualization
tickets_by_month = all_clean.groupby('month').size()
tickets_by_month = tickets_by_month.reindex(['March', 'April', 'May'])

plt.figure(figsize=(10, 6))
tickets_by_month.plot(kind='bar', color='green')
plt.title('Number of Escalated Link Downs with NET Tickets by Month')
plt.ylabel('Count')
plt.xlabel('Month')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()
