# Part 1: Function to Parse WhatsApp Chat for Task Completion

In [1]:
import re
from datetime import datetime, date  # Importing date to use date.today()

def parse_task_completion_and_additions(file_path, start_date, end_date):
    """
    Read the WhatsApp file path, handle the date format, convert start and end dates to determine object,
    use regex to capture the date, time, user, and message. Additionally, capture user additions.
    """
    # Set to store users who have sent 'done', 'dn', or 'تم'
    done_users = set()
    
    # Set to store users who were added or who added someone
    added_users = set()
    added_by_others = set()

    # Read the WhatsApp file
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    # format (dd.mm.yyyy)
    date_format = '%d.%m.%Y'

    # Convert the start and end dates to datetime objects
    start_date = datetime.strptime(start_date, date_format).date()
    end_date = datetime.strptime(end_date, date_format).date()

    # Regex patterns
    # message_pattern = re.compile(r'\[(\d{1,2}\.\d{1,2}\.\d{4}) (\d{2}:\d{2}:\d{2})\] (?:~\s*|)(.*?): (.*)')
    # added_pattern = re.compile(r'~\s*(.*?) was added')
    # added_by_pattern = re.compile(r'~\s*(.*?) added ~\s*(.*)')
    message_pattern = re.compile(r'\[(\d{1,2}\.\d{1,2}\.\d{4})(?:,)? (\d{2}:\d{2}:\d{2})\] (?:~\s*|)(.*?): (.*)')
    added_pattern = re.compile(r'~\s*(.*?) was added')
    added_by_pattern = re.compile(r'~\s*(.*?) added ~\s*(.*)')
    join_from_community_pattern = re.compile(r'\[(\d{1,2}\.\d{1,2}\.\d{4})(?:,)? (\d{2}:\d{2}:\d{2})\] (?:~\s*|)(.*?): (.*) joined from the community')



    current_user = None
    current_message = ""

    for line in lines:
        match = message_pattern.match(line)
        join_match = join_from_community_pattern.match(line)

        if match:
            # If there's a new match, process the previous message before resetting
            if current_user and current_message:
                # Process the previous message for task completion
                cleaned_message = current_message.strip().lower()
                if ('done' in cleaned_message or 
                    'dn' in cleaned_message or 
                    '✅' in cleaned_message or
                    'done✅' in cleaned_message or
                    'done' in cleaned_message or
                    'تم ' in cleaned_message or
                    'ضن' in cleaned_message or
                    re.search(r'\bتم\b', cleaned_message) or 
                    cleaned_message == 'تم' or 
                    'تم' in cleaned_message):
                    done_users.add(current_user)

            # Start a new message
            date_str, time_str, user, message = match.groups()
            current_user = user.strip()
            current_message = message.strip()

            # Convert message date to a date object and check if within date range
            message_date = datetime.strptime(date_str, date_format).date()

            if not (start_date <= message_date <= end_date):
                current_user = None  # Ignore this message if it's not in the date range
                current_message = ""
                continue

            # Check for added users or users added by someone
            added_match = added_pattern.search(current_message)
            added_by_match = added_by_pattern.search(current_message)

            if added_match:
                added_user = added_match.group(1).strip()
                added_users.add(added_user)

            if added_by_match:
                added_by = added_by_match.group(1).strip()
                added_user = added_by_match.group(2).strip()
                added_users.add(added_user)
                added_by_others.add(added_by)
                
            elif join_match:
                # Process the "joined from the community" message
                date_str, time_str, user, _ = join_match.groups()
                user = user.strip()
                added_users.add(user)

        else:
            # If no match, it means this is a continuation of the current message (multi-line)
            current_message += " " + line.strip()

    # Process the last message after the loop ends
    if current_user and current_message:
        cleaned_message = current_message.strip().lower()
        if ('done' in cleaned_message or 
            'dn' in cleaned_message or 
            '✅' in cleaned_message or
            'done✅' in cleaned_message or
            'done' in cleaned_message or
            'تم ' in cleaned_message or
            'ضن' in cleaned_message or
            re.search(r'\bتم\b', cleaned_message) or 
            cleaned_message == 'تم' or 
            'تم' in cleaned_message):
            done_users.add(current_user)

    return done_users, added_users, added_by_others

# Part 2: Compare Group Members with Task Completion

In [5]:
def compare_group_with_done_users_and_added(done_users, added_users, added_by_others):
    """
    Comparing the list of groups that I got from 18.08.2024 (after the deadline one) until now.
    And find the user that didn't match with the existing user that send from 26.08.2024 (deadline 3)
    Also prints added users and who added them.
    """
    # Full list of group members provided
    group_members = {
"* PS",
".",
"..",
"3bnaby",
"Abdelfattah Mohamed",
"Abdelrahman Adel PS",
"Abdulrhman Ahmed PS",
"Ahmad Zanaty",
"Ahmed Elkassrawy PS",
"Ahmed Mahmoud Dabour",
"Ahmed Mohamed PS",
"Ahmed Mounir PS",
"Ahmed Salman PS",
"Ali Ismail",
"Ali PS",
"Amany Mohammed PS",
"Amina",
"Amina PS",
"Aysha",
"Big_O PS",
"David PS",
"Elknz PS",
"Eman Ashraf PS",
"Eng Mo _ Atia",
"Esraa Elzehery",
"Esraa Salah",
"Gehad",
"Habiba Gamal PS",
"Hajer",
"Hasnaa PS",
"Hesham Elsayed PS",
"Hesham Hatem",
"Hüsam",
"Ibrahim Atef Eltahlawy 🇵🇸",
"Islam PS",
"Jehad Amjad",
"KHALED",
"Karema PS",
"Karim PS",
"Kholoud",
"MS",
"Mahmoud Salem",
"Mahmoud 🔴🔵⚽",
"Mariam Elgazzar",
"Mariam Osman PS",
"Marwa PS",
"Menna Seif",
"Merna",
"Minno PS",
"Mo7 Shdw PS",
"Mo7amed Galal PS",
"Mohamed",
"Mohamed PS",
"Mohamed Rabee PS",
"Mostafa Medhat PS",
"Mostafa Sayed",
"Nada Abdelghaffar PS",
"Nariman Awny",
"Ola",
"Omar",
"Omar Nabil PS",
"Omar Sayed",
"Omnia Ashraf PS",
"Omnia PS",
"Os!Rabe3 PS",
"PS",
"PS-StudyingDS 📔",
"Professor 👤",
"Radwan",
"Rawan",
"Reham Galal",
"Salah Mohamed Twtr",
"Salma",
"Salma Abdelkarim",
"Salma PS",
"Seif",
"Shady",
"Shahd Hamdi",
"Shahd Tamer PS",
"Shaimaa",
"Shaimaa Fikry",
"TAREK PS",
"Taha Shaban",
"Tarek PS",
"Tasneem",
"Xo_Sh3r11f",
"Yos S3D Twtr",
"Younes PS",
"Zeyad PS",
"Ziad Nasser PS",
"Zoe💜",
"alaafahiem",
"amenasaad83",
"hager ashraf",
"hussien",
"mahmoudasal",
"~ Ahmed",
"إسراء",
"رَحْمَة أَمِينٍ",
"رُفَيده سَعْد PS",
"غادة PS",
"محمد بن محمود الهواري",
"محمود محمد PS",
"ميخا PS",
"ّّّّّّّّّّّّّّّّّّّّّّّّّ",
"‎You",
"›› 라니아 ‹‹",
"☆아말☆أمل PS",
"𓂆 PS",
"𝑫𝑶𝑯𝑨",
}


    # Find users in the group who have not completed the task
    not_done_users = group_members - done_users

    # Output users who did NOT complete the task
    print("\nUsers who did NOT complete the task:")
    if not_done_users:
        for user in sorted(not_done_users):
            print(user)
    else:
        print("All users completed the task.")

    print("\n==================================")
    print("\nUsers who completed the task:")
    for user in sorted(done_users):
        print(user)

    # Output users who were added to the group
    print("\n==================================")
    print("\nUsers who were added to the group:")
    for user in sorted(added_users):
        print(user)

    # Output users who added others to the group
    print("\n==================================")
    print("\nUsers who added someone to the group:")
    for user in sorted(added_by_others):
        print(user)


# Part 3: Main Function

In [6]:
def main():
    """Main Function to apply all the functions written above"""
    # File path of exported WhatsApp chat (change it to your file's location)
    file_path = '_chat_9_DS.txt'

    # Specify the date range (start date and end date)
    start_date = '01.12.2024'
    end_date = date.today().strftime('%d.%m.%Y')  # Use today's date

    # Extract users who completed the task between the start and end date
    done_users, added_users, added_by_others = parse_task_completion_and_additions(file_path, start_date, end_date)

    # Compare the task completion with the full list of group members and output added users
    compare_group_with_done_users_and_added(done_users, added_users, added_by_others)

if __name__ == '__main__':
    main()


Users who did NOT complete the task:
.
..
Abdelfattah Mohamed
Abdulrhman Ahmed PS
Amina
Amina PS
David PS
Eng Mo _ Atia
Esraa Elzehery
Esraa Salah
Gehad
Habiba Gamal PS
Hajer
Hasnaa PS
Hesham Elsayed PS
Ibrahim Atef Eltahlawy 🇵🇸
Islam PS
Jehad Amjad
Kholoud
Mahmoud 🔴🔵⚽
Marwa PS
Menna Seif
Merna
Mohamed
Nariman Awny
Ola
Omar
PS-StudyingDS 📔
Professor 👤
Rawan
Salma Abdelkarim
Shaimaa Fikry
Taha Shaban
Tarek PS
Tasneem
Xo_Sh3r11f
Younes PS
Ziad Nasser PS
alaafahiem
إسراء
رَحْمَة أَمِينٍ
غادة PS
ّّّّّّّّّّّّّّّّّّّّّّّّّ
‎You


Users who completed the task:
* PS
3bnaby
Abdelrahman Adel PS
Ahmad Zanaty
Ahmed Elkassrawy PS
Ahmed Mahmoud Dabour
Ahmed Mohamed PS
Ahmed Mounir PS
Ahmed Salman PS
Ali Ismail
Ali PS
Amany Mohammed PS
Aysha
Big_O PS
Elknz PS
Eman Ashraf PS
Hesham Hatem
Hüsam
KHALED
Karema PS
Karim PS
MS
Mahmoud Salem
Mariam Elgazzar
Mariam Osman PS
Minno PS
Mo7 Shdw PS
Mo7amed Galal PS
Mohamed PS
Mohamed Rabee PS
Mostafa Medhat PS
Mostafa Sayed
Nada Abdelghaffar PS
Omar Nabil PS
Om

# PART4: Generate a list of Group-members 
### `(with a specific date until now)`

In [4]:
def extract_group_members(file_path, start_date, end_date):
    # Set to store unique users
    group_members = set()

    # Read the WhatsApp export file
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    # Specify the date format that matches your WhatsApp's export format (dd.mm.yyyy)
    date_format = '%d.%m.%Y'

    # Convert the start and end dates to datetime objects
    start_date = datetime.strptime(start_date, date_format).date()
    end_date = datetime.strptime(end_date, date_format).date()

    # Regex pattern to capture WhatsApp message with date, time, user, and message
    # pattern = re.compile(r'\[(\d{1,2}\.\d{1,2}\.\d{4}) (\d{2}:\d{2}:\d{2})\] (?:~\s*|)(.*?): (.*)')
    pattern = re.compile(r'\[(\d{1,2}\.\d{1,2}\.\d{4})(?:,)? (\d{2}:\d{2}:\d{2})\] (?:~\s*|)(.*?): (.*)')

    for line in lines:
        match = pattern.match(line)
        if match:
            date_str, time_str, user, message = match.groups()

            # Convert message date to a date object
            message_date = datetime.strptime(date_str, date_format).date()

            # Check if the message date is within the specified date range
            if start_date <= message_date <= end_date:
                # Clean up the user name by stripping extra spaces or non-visible characters
                user = user.strip()

                # Debugging: Print the users being processed
                print(f"Processing message from: {user} at {date_str}")

                group_members.add(user)
        else:
            # Log lines that don't match to debug why certain messages are missed
            print(f"Line not matched: {line.strip()}")

    return group_members

def main():
    # File path of exported WhatsApp chat (change it to your file's location)
    file_path = '_chat_9_DS.txt'

    # Specify the date range (start date and end date)
    start_date = '10.11.2024'
    end_date = date.today().strftime('%d.%m.%Y')  # Use today's date

    # Extract group members who sent messages between the start and end date
    group_members = extract_group_members(file_path, start_date, end_date)

    # Output the group members
    print(f"\nGroup members who sent messages between {start_date} and {end_date}:")
    if group_members:
        for member in sorted(group_members):
            print(f'"{member}",')
    else:
        print("No messages found in the given date range.")

if __name__ == '__main__':
    main()


Line not matched: ‎[2.05.2024, 20:42:12] Kholoud: ‎sticker omitted
Line not matched: مش عارف هنبدأ امتى
Line not matched: بس هنبدأ
Line not matched: ما تيلا يعم يوسف الاسبوع قرب يخلص والناس مستنيالي غلطة
Line not matched: 
Line not matched: هنبدأ من انهاردة ان شاء الله كتاب cracking the code interview question او كما يُقال خُلاصة LeetCode لان الشركات بتجيب منه اسئلة الانترڤيوز
Line not matched: الاول soft skills وتعمل ايه ف الانترڤيو ومرحلة قبل او بعد الانترڤيو وتعمل ايه فال cv وال FAANG ال process بتاعتها عاملة ازاي وبجد مفيد جدا بس انا مهتم بالقسم التاني، ف بصو بصة عليه هيفيدكم
Line not matched: القسم التاني ال Topics فيها عن ال ps ومتقسم topics وشرح واسئلة عليهم
Line not matched: هنشوف شرح الكتاب واسئلته ونذاكر من ال handbook،، الاتنين مع بعض هيخلوك تنين مجنح
Line not matched: - اول شابتر ف الكتاب arrays - strings كلام بسيط بس الكتاب فيه معلومات انا متاكد ان الاغلب ميعرفهاش
Line not matched: - ⁠مذاكرة ال arrays و ال strings من ال handbook
Line not matched: 
Line not matched: ال dead