In [1]:
import re
import typing
import os
import io
from datetime import date

In [11]:
def extract_message(message:str, my_age_lower:int, my_age_upper:int, my_gender:typing.Literal['F', 'M']) -> bool:
    """
    Parameters
        message: str, the message we want to check
        my_age_lower: int, the lower bound of age we want to check in message
        my_age_upper: int, the upper bound of age we want to check in message
        my_gender: 'F' or 'M', the gender we want to check in message. 
    Return
        bool
    """
    age_qualified = 0
    gender_qualified = 0

    def check_range(require_range, my_age_upper, my_age_lower):
        """
        Check if my age range overlaps with the require range
        Returns: 
            bool (True if the range overlaps; otherwise, False)
        """
        if my_age_lower<=require_range[1] and require_range[1]<=my_age_upper:
            return True
        elif require_range[0]<=my_age_upper and my_age_upper<=require_range[1]:
            return True
        return False
    # Extract character age, character gender, and filming dates using regex
    character_age_pattern = r"[\(（]\d{2}[-~～]\d{2}[\)）]|\d{2}[-~～]\d{2}\s*[yY歲]|\d{2,3}\s*[yY歲]"
    character_gender_pattern = r"(男|女|male|female)"
    character_ages = re.findall(character_age_pattern, message)
    character_genders = re.findall(character_gender_pattern, message)
    print(f"Character ages: {character_ages}")
    
    # Check "Age" data
    if len(character_ages)==0:
        age_qualified = -1          # message didn't mention age
    else:
        number_only_pattern = r"(\d+[-~～]?\d+)"
        for ch_age in character_ages:
            age = re.search(number_only_pattern, ch_age).group()
            if age.isdigit():           # case that the messsage specified age
                if (my_age_lower<=int(age)) and (int(age)<=my_age_upper):
                    print(ch_age)
                    age_qualified = 1
            else:                       # case that the message contains age range  
                for delimiter in ['-', '~', '～']:
                    if (delimiter in age):
                        require_age = list(map(int, age.split(delimiter)))
                        if check_range(require_age, my_age_upper, my_age_lower):
                            age_qualified = 1
                            print(ch_age)
                            break

    # Check "Gender" data
    if len(character_genders)==0:
        gender_qualified = -1        # message didn't mention gender
    else:
        for gender in character_genders:
            if my_gender=='F' and (gender=="female" or gender=="女"):
                print(gender)
                gender_qualified = 1
            elif my_gender=='M' and (gender=="male" or gender=="男"):
                print(gender)
                gender_qualified = 1

    if age_qualified==1 and gender_qualified==1:
        return True
    elif age_qualified==1 and gender_qualified==-1:
        return True
    elif age_qualified==-1 and gender_qualified==1:
        return True
    else:
        return False

In [46]:
import io
ctr = 0
for line in io.open('/Users/chi/Downloads/extract.txt', newline='\r\n'):
    print(f"message: {ctr}")
    print(extract_message(line, 18, 28, 'F'))
    # print((line), "nexttttt")
    ctr+=1

message: 0
Character ages: ['25-50y', '25-50y', '40-50y']
25-50y
25-50y
女
女
女
True
message: 1
Character ages: ['30歲', '25-30歲']
25-30歲
女
True
message: 2
Character ages: ['45~50y', '30~40y', '30~35y', '50~55y', '45~50y', '30~40y']
女
False
message: 3
Character ages: ['（18～35）', '23-25歲']
（18～35）
23-25歲
女
女
True
message: 4
Character ages: ['（18～35）', '23-25歲']
（18～35）
23-25歲
女
女
True
message: 5
Character ages: ['35-55y']
女
False


In [12]:
# read the text from the end of file separated by carriage return 
search_date = f"{date.today().year}/{date.today().month:02d}/{date.today().day:02d}"
search_date = "2023/03/12"
age_lower = 18
age_upper = 28
gender = 'F'
search_file = '/Users/chi/Downloads/extract_date.txt'

with open(search_file, 'rb') as f:
    buff = b""
    if os.path.getsize(search_file) > 2:
        f.seek(-1, os.SEEK_END)
        while f.tell() >= 0:
            c = f.read(1)
            if c == b'\r':          # When there's a carriage return, it's a single message chunk
                buff_decode = buff.decode().strip()
                print(buff_decode)
                if buff_decode.startswith(search_date):   # Stop when the message is published in assigned search date 
                    break                                 # (avoid look up messages before the assigned search date)
                else:               # check if the user is qualified for the notice
                    if (extract_message(buff_decode, age_lower, age_upper, gender)):
                        print("APPLY!")
                    else:
                        print("Oops")
                buff=b""
                f.seek(-2, os.SEEK_CUR)
            elif f.tell() == 1:     # When the cursor points at the first character of the file
                buff=c+buff
                buff_decode = buff.decode().strip()
                if (extract_message(buff_decode, age_lower, age_upper, gender)):
                    print("APPLY!")
                else:
                    print("Oops")
                buff=b""
                f.seek(0)
                print(f"It's the begining of file: {f.tell()}")
                break
            else:                   # Stores characters into buffer
                buff=c+buff
                f.seek(-2, os.SEEK_CUR)

04:41 AM	4-4139祁煜恒	"3/14善良影集
早上06:00-14:00/北市

客人男女28-45歲
費用：500+100早車/8小時
———————————
中午12:00-18:00/北市

情侶25-35歲
商務男女30-50歲
費用：700/8小時
來過不能來
❤️‍🔥秒回是我的職責，顏值是我的特色，歡迎來找我🥹"
Character ages: ['28-45歲', '25-35歲', '30-50歲']
28-45歲
25-35歲
女
女
APPLY!
12:58 AM	🌟李小凡⭐	"3/12惡靈世界影集

1.🗿
角色：記者*2
視覺20-35歲
時間：10：00（暫定）
費用：700
略有動作跟情境戲
地點：台北市

8h/班 如遇超班 每小時+100

有意願報名的朋友 請給我
姓名/身高/體重/年紀/
五官清晰照片

拍攝時長浮動大 
不接受臨時早退

🗿不可以拍過的喔
💢費用收工現領
Line: leechaofan 💢"
Character ages: ['20-35歲']
20-35歲
APPLY!
12:26 AM	❤️桂ᴸᴵᴺᴳ	"3/12惡靈
早上8:00 西裝便衣1男
身高172以上
25-35歲左右

記者3男
25-35歲左右


費用700/8小時
時間費用可以請跟我報名"
Character ages: ['25-35歲', '25-35歲']
25-35歲
25-35歲
Oops
12:22 AM	4-4139祁煜恒	"免疫-偶像劇   3/12（日）
❗很容易上❗
時間： 09：00
地點：新北市三峽區大學路

女大學生
視覺年齡 25內
費用：700+100（800含車）
8h/班 如遇超班 每小時+100
收工現領
❤️‍🔥秒回是我的職責，顏值是我的特色，歡迎來找我🥹"
Character ages: []
女
APPLY!
2023/03/12, Sun
