In [None]:
%load_ext autoreload
%autoreload 2

import re
import pandas as pd
from typing import Tuple, Optional
from pathlib import Path
from datetime import timedelta, datetime
from whatsapp_parser import extract_messages, cleanup, WhatsAppGroupAnalysis

In [None]:
file_path = Path("_chat 3.txt") # Path to your exported WhatsApp chat
assert file_path.exists()

In [None]:
previous_df = pd.read_csv("../20231120_Messages.csv", sep="|")
previous_df["Datetime"] = pd.to_datetime(previous_df["Datetime"])
previous_df

In [None]:
def parse_chat_line(line):
    match = re.match(r"\[(.*?)\] (.*?): (.*)", line)
    if match:
        date_time_str, sender, message = match.groups()
        try:
            date_time = datetime.strptime(date_time_str, '%Y-%m-%d, %H:%M:%S')
        except ValueError:
            date_time = datetime.strptime(date_time_str, '%m/%d/%y, %H:%M:%S')
        return date_time, sender, message
    return None

def parse_chat(file_path: str) -> pd.DataFrame:
    """
    Parses a WhatsApp chat log into a DataFrame.

    Parameters:
        file_path (str): Path to the chat log file.
        lines_to_read (int): Number of lines to read from the file.

    Returns:
        pd.DataFrame: DataFrame containing the parsed chat with columns 'Sender', 'Datetime', 'Message'.
    """
    parsed_data = []
    with open(file_path, 'r') as file:
        for _, line in enumerate(file):
            parsed_line = parse_chat_line(line)
            if parsed_line:
                parsed_data.append(parsed_line)

    # Creating a DataFrame
    df = pd.DataFrame(parsed_data, columns=['Datetime', 'Sender', 'Message'])
    # df['Datetime'] = pd.to_datetime(df['Datetime'], format="%Y-%m-%d, %H:%M:%S")
    return df

df = parse_chat(file_path=file_path)
print(f"Before cleanup: {len(df)}")
df = cleanup(df)
print(f"After cleanup: {len(df)}")

df

In [None]:
df = pd.concat([df, previous_df], ignore_index=True)
df.Datetime.min(), df.Datetime.max()

In [None]:
# Find latest date from df
latest_date = df['Datetime'].max().strftime("%Y%m%d")
file_name = f"../{latest_date}_Messages.csv"
df.Datetime.min(), df.Datetime.max()
# Save the dataframe as a csv file with {latest_date}_Messages.csv as the filename.
df.to_csv(file_name, index=False, encoding='utf-8', header=True, sep="|")
# df = pd.read_csv(file_name, sep="|")
# df.Datetime.min(), df.Datetime.max()

In [None]:
# Create an instance of the class using the uploaded CSV file
analysis = WhatsAppGroupAnalysis(df)

# Test the methods
current_users_df = analysis.get_current_users()
message_count_in_window_df = analysis.get_message_count_in_window(10)
# message_count_in_window_df

In [None]:
inactive_users_to_remove = analysis.get_inactive_users(exclude_contacts=False)
inactive_users_to_remove

In [None]:
# Sort by total messages sent and then by joining date
inactive_users = inactive_users_to_remove.sort_values(
    by=['Total_Messages_Sent', 'Joining_Date'], ascending=[True, True])
# Display the DataFrame
inactive_users

In [None]:
inactive_users.to_csv('inactive_users.csv', index=False)