In [7]:
f"https://www.dafont.com/forum/change.php?f=2&p=317"

'https://www.dafont.com/forum/change.php?f=2&p=317'

In [None]:
https://www.dafont.com/forum/?f=1&p=317

In [None]:
import dataclasses
import datetime as dt
import json
import logging
import requests
import urllib.parse
import re
import typing

import bs4


@dataclasses.dataclass
class Task:
    task_id: str
    url: str
    title: str
    img_url: str
    identified_font: str | None


@dataclasses.dataclass
class TaskThumb:
    task_id: StopAsyncIteration
    url: str
    updated_at: dt.datetime


def parse_task_from_task_url(task_url: str, session: requests.Session) -> Task:
    response = session.get(task_url)
    soup = bs4.BeautifulSoup(response.text, "html.parser")
    user_img = next(img for img in soup.find_all("img") if img.get("src", "").startswith("/forum/attach/orig"))
    user_img_url = urllib.parse.urljoin("https://www.dafont.com", user_img["src"])
    user_img_alt = user_img.get("alt", "")
    identified_font = next(
        (
            div.find('a').text
            for div in soup.find_all("div")
            if div.text.startswith('Identified font:') and div.find('a')
        ),
        None
    )
    return Task(
        task_id=urllib.parse.urlsplit(task_url).path.split('/')[3],
        url=task_url,
        title=user_img_alt,
        img_url=user_img_url,
        identified_font=identified_font
    )

def _loop_task_thumbs(page_no: int, session: requests.Session) -> typing.Generator[TaskThumb, None, None]:
    url = f"https://www.dafont.com/forum/?f=1&p={page_no}"
    response = session.get(url)
    response.raise_for_status()

    update_pattern = r"\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d{2} at \d{2}:\d{2}\b"
    soup = bs4.BeautifulSoup(response.text, 'html.parser')
    for thumb in soup.find_all('div', class_="thumb_cont"):
        path = thumb.find('div', class_='thumb_img').find('a').get('href')
        url = urllib.parse.urljoin("https://www.dafont.com/forum/", path)
        at_str = re.search(update_pattern, thumb.find('div', class_='nobr dfsmall tdh').text)
        year = dt.datetime.now().year
        at_dt = dt.datetime.strptime(f"{year} {at_str.group()}", "%Y %b %d at %H:%M")
        yield TaskThumb(
            task_id=path.split('/')[1],
            url=url,
            updated_at=at_dt
        )


def loop_task_thumbs(session: requests.Session) -> typing.Generator[TaskThumb, None, None]:
    page_no = 1
    while True:
        logging.info(f"Checking page {page_no}")
        yield from _loop_task_thumbs(page_no, session)
        page_no += 1


session = requests.Session()
# Define a realistic browser headers dictionary
session.headers.update({
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/114.0.0.0 Safari/537.36"
    )
})

def save_tasks(tasks: dict[int, Task], filename: str = 'tasks.json') -> None:
    with open(filename, 'w') as f:
        json.dump(
            [dataclasses.asdict(task) for task_id, task in tasks.items()],
            f,
            indent=4
        )
        logging.info(f"Saved tasks to {filename}")

with open('tasks.json') as f:
    tasks = {
        task['task_id']: Task(**task)
        for task in json.load(f)
    }

with open('last_checked_at.txt') as f:
    last_checked_at = dt.datetime.fromisoformat(f.read().strip())
new_last_checked_at = last_checked_at

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

for i, task_thumb in enumerate(loop_task_thumbs(session)):

    if task_thumb.updated_at < last_checked_at:
        break
    new_last_checked_at = max(new_last_checked_at, task_thumb.updated_at)

    if task_thumb.task_id in tasks:
        continue

    task = parse_task_from_task_url(
        task_url=task_thumb.url,
        session=session
    )
    tasks[task.task_id] = task

    logging.info(f"Added task {task.task_id} ~ {task.url}")

    if i % 10 == 0:
        save_tasks(tasks)

save_tasks(tasks)

with open('last_checked_at.txt', 'w') as f:
    f.write(new_last_checked_at.isoformat())

2025-06-02 18:49:46,145 - INFO - Checking page 1
2025-06-02 18:49:46,241 - INFO - Saved tasks to tasks.json


In [None]:
task_url = task_thumb.url
response = session.get(task_url)
soup = bs4.BeautifulSoup(response.text, "html.parser")
user_img = next(img for img in soup.find_all("img") if img.get("src", "").startswith("/forum/attach/orig"))
user_img_url = urllib.parse.urljoin("https://www.dafont.com", user_img["src"])
user_img_alt = user_img.get("alt", "")
identified_font = next(
    (
        div.find('a').text
        for div in soup.find_all("div")
        if div.text.startswith('Identified font:')
    ),
    None
)

TypeError: 'NoneType' object is not callable

In [175]:
user_img.parent.parent.parent.parent.contents[0].text

''

<div class="thumb_img" style="height:120px;line-height:120px"><a href="read/575706/qual-o-nome-dessa-font" style="height:120px"> <img height="120" src="/forum/attach/thumb/1/2/1216445.jpg" width="113"/> </a></div>

In [None]:
import re
import datetime as dt

update_pattern = r"\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d{2} at \d{2}:\d{2}\b"
soup = bs4.BeautifulSoup(response.text, 'html.parser')
for thumb in soup.find_all('div', class_="thumb_cont"):
    url = thumb.find('div', class_='thumb_img').find('a').get('href')
    at_str = re.search(update_pattern, thumb.find('div', class_='nobr dfsmall tdh').text)
    year = dt.datetime.now().year
    at_dt = dt.datetime.strptime(f"{year} {at_str.group()}", "%Y %b %d at %H:%M")
    yield url, at_dt

2025-06-02 17:13:00
2025-06-02 17:08:00
2025-06-02 16:43:00
2025-06-02 16:24:00
2025-06-02 15:43:00
2025-06-02 15:41:00
2025-06-02 15:13:00
2025-06-02 15:05:00
2025-06-02 14:25:00
2025-06-02 14:19:00
2025-06-02 14:19:00
2025-06-02 13:44:00
2025-06-02 13:36:00
2025-06-02 13:25:00
2025-06-02 11:34:00
2025-06-02 11:33:00
2025-06-02 11:31:00
2025-06-02 10:31:00
2025-06-02 10:31:00
2025-06-02 10:30:00
2025-06-02 09:43:00
2025-06-02 08:32:00
2025-06-02 08:24:00
2025-06-02 08:03:00
2025-06-02 06:23:00
2025-06-02 06:23:00
2025-06-02 05:46:00
2025-06-02 05:46:00
2025-06-02 05:43:00
2025-06-02 05:23:00
2025-06-02 03:51:00
2025-06-02 03:11:00
2025-06-02 01:41:00
2025-06-02 00:55:00
2025-06-02 00:21:00
2025-06-02 00:07:00
2025-06-01 22:08:00
2025-06-01 21:17:00
2025-06-01 21:15:00
2025-06-01 20:10:00
2025-06-01 20:04:00
2025-06-01 20:00:00
2025-06-01 19:57:00
2025-06-01 19:43:00
2025-06-01 18:27:00
2025-06-01 16:38:00
2025-06-01 16:16:00
2025-06-01 15:50:00
2025-06-01 14:47:00
2025-06-01 13:36:00


In [None]:
th

In [95]:
import requests
import urllib.parse
import bs4

url = "https://www.dafont.com/forum/read/953/i-need-the-exact-font-on-this-file-please"

# Define a realistic browser headers dictionary
headers = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/114.0.0.0 Safari/537.36"
    )
}

# Make request with headers
response = requests.get(url, headers=headers)
soup = bs4.BeautifulSoup(response.text, "html.parser")
user_img = next(img for img in soup.find_all("img") if img.get("src", "").startswith("/forum/attach/orig"))
user_img_url = urllib.parse.urljoin("https://www.dafont.com", user_img["src"])
user_img_alt = user_img.get("alt", "")
user_text = str(user_img.parent.parent.parent.parent.contents[0].strip())

print(user_img_alt)
print(user_text)
print(user_img_url)

I need the exact font on this file please....
I need to recreate the exact text using the same font, please anyne who knows the font... thanks
https://www.dafont.com/forum/attach/orig/3/6/3615.jpg


In [104]:
next(
    div.find('a').text
    for div in soup.find_all("div")
    if div.text.startswith('Identified font:')
)

'Kredit'

In [96]:
soup

<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html><head><title>I need the exact font on this file please.... - forum | dafont.com</title><meta content="Archive of freely downloadable fonts. Browse by alphabetical listing, by style, by author or by popularity." name="description"/><meta content="font, download, ttf, freeware, typefaces, typography, typeface, fonts, free, true type, dingbats" name="keywords"/><meta content="https://www.dafont.com/forum/attach/thumb/3/6/3615.jpg" name="thumbnail"/><meta content="max-image-preview:large" name="robots"/><script src="https://cache.consentframework.com/js/pa/24639/c/yGbKH/stub" type="text/javascript"></script>
<script async="" src="https://choices.consentframework.com/js/pa/24639/c/yGbKH/cmp" type="text/javascript"></script>
<script src="/js/df5.js" type="text/javascript"></script><script type="text/javascript">if(ht>"618"){var skypos="fixed";window.onscroll=scrollpos;}else var skypos=