In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
from pathlib import Path
import zipfile

import click
from loguru import logger
from whatsapp_parser import extract_dataframe
from whatsapp_parser import WhatsAppGroupAnalysis

Path.ls = lambda x: list(x.iterdir())

In [2]:
source_folder_path = Path("../Community_Chat_Exports")
chat_text_files_path = Path("../chat_text_files")
chat_text_files_path.mkdir(parents=True, exist_ok=True)
# Extract all zip files in the folder while retaining the name from zip file after "_"
def extract_and_rename_zip_files(source_folder_path: Path, export_path: Path):
    for file in source_folder_path.glob("*.zip"):
        logger.info(f"Extracting {file}")
        with zipfile.ZipFile(file, "r") as zip_ref:
            # Rename the extracted txt file with the name of the zip file after "_"
            new_file_name = chat_text_files_path / f"{file.stem.split('_')[1]}.txt"
            zip_ref.extractall(source_folder_path)
            for extracted_file in source_folder_path.glob("*.txt"):
                logger.info(f"Renaming {extracted_file} to {new_file_name}")
                extracted_file.rename(new_file_name)

extract_and_rename_zip_files(source_folder_path, chat_text_files_path)

In [3]:
chat_csv_files = Path("../chat_csv_files")
chat_csv_files.mkdir(parents=True, exist_ok=True)

In [4]:
def process_file(input_file):
    input_path = Path(input_file)
    if not input_path.is_file() or input_path.suffix != ".txt":
        click.echo(f"Error: {input_file} is not a valid .txt file")
        return

    logger.info(f"Processing {input_path.name}")
    df = extract_dataframe(input_path)
    df["group_name"] = input_path.stem
    output_file = chat_csv_files / f"{input_path.stem}.csv"
    df.to_csv(output_file, index=False)
    logger.info(f"Processed {len(df)} rows. Output saved to {output_file}")


for file in chat_text_files_path.ls():
    process_file(file)

In [5]:
chat_csv_files = Path("../chat_csv_files")

combined_df = pd.concat(
    [
        pd.read_csv(file)
        for file in chat_csv_files.ls()
        if file.is_file() and file.suffix == ".csv"
    ]
)
combined_df.head()

ValueError: No objects to concatenate

In [56]:
# Create an instance of the class using the uploaded CSV file
analysis = WhatsAppGroupAnalysis(combined_df)

# Test the methods
current_users_df = analysis.get_current_users()
message_count_in_window_df = analysis.get_message_count_in_window(60)
# message_count_in_window_df

In [57]:
current_users_df

(                                   User
 0                        ~ Apurva Bhatt
 1                       ~ Shaurya Gupta
 2                               ~ Tapan
 3                    Nirmal GenAI group
 4     Dr. Ashith Generative AI WA Group
 ..                                  ...
 760                    ~ Prasanna/Vinay
 761                      ~ Shreya Mandi
 762                  ~ Amritansh Mishra
 763                            ~ Palash
 764                          ~ Anuruddh
 
 [765 rows x 1 columns],
 765)

In [58]:
inactive_users_to_remove = analysis.get_inactive_users(exclude_contacts=False)

In [59]:
# Sort by total messages sent and then by joining date
inactive_users = inactive_users_to_remove.sort_values(
    by=["Total_Messages_Sent", "Joining_Date"], ascending=[True, True]
)
# Display the DataFrame
inactive_users[:201]

Unnamed: 0,User,Message_Count_In_Window,Joining_Date,Total_Messages_Sent,Most_Recent_Message_Date
57,~ Charlie,0.0,2023-04-19 01:38:54,1,2023-04-19 01:38:54
58,Saurab Paruthi,0.0,2023-04-30 12:58:57,1,2023-04-30 12:58:57
62,~ PARITOSH,0.0,2023-05-26 12:13:40,1,2023-05-26 12:13:40
63,~ Chandan,0.0,2023-05-27 11:21:30,1,2023-05-27 11:21:30
64,~ Ugam Kamat,0.0,2023-05-28 09:12:28,1,2023-05-28 09:12:28
...,...,...,...,...,...
230,~ Divyansh Tripathi,0.0,2024-03-09 13:47:33,5,2024-04-05 23:34:55
231,~ Rohit Joshi,0.0,2024-03-18 09:20:46,5,2024-03-22 12:06:42
248,~ Sreedevi,0.0,2024-03-31 16:55:53,5,2024-05-03 18:45:52
252,~ Charu,0.0,2024-04-07 11:53:09,5,2024-08-04 14:03:09


In [60]:
inactive_users.to_csv("inactive_users.csv", index=False)