In [2]:
import json
import requests
from newspaper import Article
from bs4 import BeautifulSoup
import re
import os

#from scrape_and_extract import *
#from scrape_and_extract import extract_pdf_pages, extract_chapter, scrape_article

In [10]:
import json

with open(r'processed_syllabi\Human_computer_interaction\scraped_data\dark_patterns_at_scale_findings_from_a_crawl_of_11k_shopping_websites.json', encoding='utf-8') as f:
    data = json.load(f)

data

{'document_type': 'research_paper',
 'title': 'Dark Patterns at Scale: Findings from a Crawl of 11K Shopping Websites',
 'author': 'Arunesh Mathur, Gunes Acar, Michael J. Friedman, Elena Lucherini, Jonathan Mayer, Marshini Chetty, and Arvind Narayanan',
 'source': 'raw_syllabi\\master_courses\\Human_computer_interaction\\pdf_material\\Mathur-2019-Dark-patterns-at-scale.pdf',
 'date_published': '2019-09-23',
 'keywords': 'Dark Patterns; Consumer Protection; Deceptive Content; Nudging; Manipulation',
 'flag': '',

In [33]:

def get_html_soup(url):
    """Fetches HTML content and returns a BeautifulSoup object."""
    response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
    if response.status_code == 200:
        return BeautifulSoup(response.text, "html.parser")
    else:
        print(f"⚠️ Failed to fetch page: {url}")
        return None

def extract_author(article, soup):
    """Attempts to extract the author using Newspaper3k first, then BeautifulSoup."""
    author = article.authors if article.authors else []
    
    # If Newspaper3k fails, try manual extraction with BeautifulSoup
    if not author and soup:
        author_tag = soup.find("meta", attrs={"name": "author"})  # Many sites store author here
        if author_tag:
            author = [author_tag["content"].strip()]
        else:
            author_div = soup.find("div", class_=re.compile(r"author", re.IGNORECASE))
            if author_div:
                author = [author_div.text.strip()]

    return author[0] if author else "Unknown"

def extract_date(article, soup):
    """Attempts to extract the publication date using Newspaper3k first, then BeautifulSoup."""
    date_published = str(article.publish_date) if article.publish_date else "Unknown"

    if date_published == "Unknown" and soup:
        time_tag = soup.find("time")  # Many sites use <time> tag
        if time_tag:
            date_published = time_tag.text.strip()
        else:
            date_meta = soup.find("meta", attrs={"property": "article:published_time"})  # OpenGraph format
            if date_meta:
                date_published = date_meta["content"]

    return date_published if date_published != "" else "Unknown"

def scrape_article(url):
    """Scrapes an online article, extracts metadata and text, and structures it in JSON format."""

    soup = get_html_soup(url)
    article = Article(url)
    article.download()
    article.parse()

    # Extract metadata
    author = extract_author(article, soup)
    date_published = extract_date(article, soup)

    # Prepare data structure
    data = {
        "document_type": "blog_post",
        "title": article.title if article.title else "Untitled",
        "author": author,
        "source_url": url,
        "date_published": date_published,
        "sections": []
    }

    # Extract subheadings & text sections
    subheadings = soup.find_all(["h2", "h3"]) if soup else []
    content = article.text.split("\n")  
    section = {"subheading": "Introduction", "text": ""}

    for paragraph in content:
        if any(heading.text.strip() in paragraph for heading in subheadings):
            data["sections"].append(section)
            section = {"subheading": paragraph.strip(), "text": ""}
        else:
            section["text"] += paragraph.strip() + " "

    data["sections"].append(section)

    print(data)
    print(article.title)

    return data



In [5]:
# Example usage
url = "https://www.interaction-design.org/literature/book/the-encyclopedia-of-human-computer-interaction-2nd-ed/human-computer-interaction-brief-intro"
scrape_article(url)

{'document_type': 'blog_post',
 'title': 'Human Computer Interaction - brief intro',
 'author': 'Unknown',
 'source_url': 'https://www.interaction-design.org/literature/book/the-encyclopedia-of-human-computer-interaction-2nd-ed/human-computer-interaction-brief-intro',
 'date_published': 'Unknown',
 'sections': [{'subheading': 'Introduction',
   'text': 'Human-computer interaction (HCI) is an area of research and practice that emerged in the early 1980s, initially as a specialty area in computer science embracing cognitive science and human factors engineering. HCI has expanded rapidly and steadily for three decades, attracting professionals from many other disciplines and incorporating diverse concepts and approaches. To a considerable extent, HCI now aggregates a collection of semi-autonomous fields of research and practice in human-centered informatics. However, the continuing synthesis of disparate conceptions and approaches to science and practice in HCI has produced a dramatic exa

In [None]:
CORRECTIONS_FILE = "processed_syllabi/metadata_corrections.json"

def sanitize_filename(title):
    """Removes invalid filename characters and replaces spaces with underscores."""
    return re.sub(r'[<>:"/\\|?*]', '', title).replace(' ', '_')

def load_metadata_corrections():
    """Loads manually corrected metadata from a JSON file or creates an empty one if missing."""
    if not os.path.exists(CORRECTIONS_FILE):
        with open(CORRECTIONS_FILE, "w", encoding="utf-8") as f:
            json.dump({}, f, indent=4, ensure_ascii=False)  # Initialize empty JSON
    with open(CORRECTIONS_FILE, "r", encoding="utf-8") as f:
        return json.load(f)

def save_metadata_corrections(corrections):
    """Saves full metadata for error spotting and manual corrections."""
    with open(CORRECTIONS_FILE, "w", encoding="utf-8") as f:
        json.dump(corrections, f, indent=4, ensure_ascii=False)

def process_course_syllabi(course_name):
    """Reads URLs & PDFs from a links.txt file and processes them."""
    raw_path = f"raw_syllabi/master_courses/{course_name}/materials_paths_test.txt" # test file
    output_folder = f"processed_syllabi/{course_name}/"
    
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Load existing metadata corrections
    metadata_corrections = load_metadata_corrections()

    with open(raw_path, "r", encoding="utf-8") as file:
        for line in file:
            line = line.strip()
            if not line or line.startswith("#"):
                continue

            article_data = None
            
            # Process online articles
            if line.startswith("http"):  
                print(f"🔗 Scraping article: {line}")
                article_data = scrape_article(line)

            """
            elif "pages=" in line:  # Process PDFs with page ranges
                pdf_path, pages = line.split(" pages=")
                page_range = range(*map(int, pages.split("-")))
                output_pdf = f"{output_folder}{os.path.basename(pdf_path).replace('.pdf', '_extracted.pdf')}"
                extract_pdf_pages(pdf_path.strip(), output_pdf, page_range)


            """

            if not article_data:
                print(f"⚠️ Skipping {line} (Extraction failed)")
                continue


            # specify title and course name for meta data
            title = sanitize_filename(article_data.get("title", "Untitled"))
            article_data["course"] = course_name
            filename = f"{output_folder}{title}.json"

            # Check if manual corrections exist and apply them
            file_key = f"{course_name}/{os.path.basename(filename)}"
            print(file_key)
            if file_key in metadata_corrections:
                corrected_metadata = metadata_corrections[file_key]
                article_data.update(corrected_metadata)  # Merge corrected fields
            
            
            # Save extracted data as JSON
            with open(filename, "w", encoding="utf-8") as f:
                json.dump(article_data, f, indent=4, ensure_ascii=False)

            # Store extracted metadata in corrections log if it's missing
            if file_key not in metadata_corrections:
                metadata_corrections[file_key] = {
                    "course": course_name,
                    "title": article_data.get("title", "Untitled"),
                    "author": article_data.get("author", "Unknown"),
                    "date_published": article_data.get("date_published", "Unknown")
                }
                save_metadata_corrections(metadata_corrections)

            



In [20]:
# Example usage
process_course_syllabi("Human_computer_interaction")

🔗 Scraping article: https://www.nngroup.com/articles/minimize-cognitive-load/
Human_computer_interaction/Minimize_Cognitive_Load_to_Maximize_Usability.json
🔗 Scraping article: https://www.nngroup.com/articles/mental-models/
Human_computer_interaction/Mental_Models_and_User_Experience_Design.json
🔗 Scraping article: https://imotions.com/blog/learning/best-practice/eye-tracking/
Human_computer_interaction/Eye_Tracking_The_Complete_Pocket_Guide.json
🔗 Scraping article: https://maitraudit.medium.com/a-complete-guide-for-eye-tracking-testing-in-ux-research-a3f95d617590
Human_computer_interaction/A_complete_guide_for_Eye-Tracking_testing_in_UX_Research.json
⚠️ Skipping raw_syllabi\master_courses\Human_computer_interaction\pdf_material\Laws of UX.pdf pages=1-12 (Extraction failed)
⚠️ Skipping raw_syllabi\master_courses\Human_computer_interaction\pdf_material\Laws of UX.pdf pages=13-22 (Extraction failed)
⚠️ Skipping raw_syllabi\master_courses\Human_computer_interaction\pdf_material\Measuring 

In [18]:
# syllabus lists specific pages

import fitz  # PyMuPDF

def extract_pdf_pages(pdf_path, output_path, page_range):
    """Extracts specific pages from a PDF."""
    doc = fitz.open(pdf_path)
    new_pdf = fitz.open()
    
    for page_num in page_range:
        if page_num < len(doc):  # Avoid out-of-range errors
            new_pdf.insert_pdf(doc, from_page=page_num, to_page=page_num)

    new_pdf.save(output_path)
    print(f"✅ Extracted pages saved to {output_path}")


# syllabus lists chapters, and pdf has structured heading

def extract_chapter(pdf_path, keyword, output_path):
    """Extracts pages containing a chapter title keyword."""
    doc = fitz.open(pdf_path)
    new_pdf = fitz.open()

    for page_num in range(len(doc)):
        text = doc[page_num].get_text("text")
        if keyword.lower() in text.lower():  # Check if chapter keyword is in the page
            new_pdf.insert_pdf(doc, from_page=page_num, to_page=page_num + 5)  # Assume chapters span ~5 pages

    new_pdf.save(output_path)
    print(f"✅ Extracted chapter saved to {output_path}")



In [32]:
from scrape_and_extract import *
from scrape_and_extract import extract_pdf_pages, extract_chapter, scrape_article

# Example usage
extract_pdf_pages("raw_syllabi/master_courses/Human_computer_interaction/pdf_material/Laws of UX.pdf", "processed_syllabi/Human_computer_interaction/clipped_pdfs/page13-22.pdf", range(13, 22))


✅ Extracted pages saved to processed_syllabi/Human_computer_interaction/clipped_pdfs/page13-22.pdf


In [33]:
# Example usage
extract_chapter("raw_syllabi/master_courses/Human_computer_interaction/pdf_material/Laws of UX.pdf", "Jakob's Law", "processed_syllabi/Human_computer_interaction/clipped_pdfs/page_chapter.pdf")


✅ Extracted chapter saved to processed_syllabi/Human_computer_interaction/clipped_pdfs/page_chapter.pdf


In [36]:
from pypdf import PdfReader

reader = PdfReader(r"raw_syllabi\master_courses\Human_computer_interaction\pdf_material\Laws of UX.pdf")
reader = PdfReader(r"raw_syllabi\master_courses\Human_computer_interaction\pdf_material\Mathur-2019-Dark-patterns-at-scale.pdf")
meta = reader.metadata

# All of the following could be None!
print(meta.title)
print(meta.author)
print(meta.subject)
print(meta.creator)
print(meta.producer)
print(meta.creation_date)
print(meta.modification_date)

Dark Patterns at Scale: Findings from a Crawl of 11K Shopping Websites
Arunesh Mathur, Gunes Acar, Michael J. Friedman, Elena Lucherini, Jonathan Mayer, Marshini Chetty, and Arvind Narayanan
-  Human-centered computing  ->  Empirical studies in HCI.HCI theory, concepts and models.-  Social and professional topics  ->  Consumer products policy.-  Information systems  ->  Browsers.
LaTeX with acmart 2019/04/22 v1.60 Typesetting articles for the Association for Computing Machinery and hyperref 2016/06/24 v6.83q Hypertext links for LaTeX
pdfTeX-1.40.17
2019-09-23 00:22:01+00:00
2019-09-23 00:22:01+00:00


In [None]:
import pymupdf  # PyMuPDF
import pypdf
import re
import os
from pathlib import Path

def extract_pdf_metadata(pdf_path):
    """Extracts metadata (title, author, date) from a PDF file."""
    title, author, date_published = "Unknown", "Unknown", "Unknown"

    try:
        with open(pdf_path, "rb") as f:
            pdf_reader = pypdf.PdfReader(f)
            metadata = pdf_reader.metadata or {}

            if metadata:
                # Print all metadata keys for debugging
                for key, value in metadata.items():
                    print(f"   pypdf* {key}: {value}")

            title = metadata.get("/Title", "Unknown")
            author = metadata.get("/Author", "Unknown")
            date_published = metadata.get("/CreationDate", "Unknown")
            keywords = metadata.get("/Keywords", "Unavailable")
            
            # format date correctly - YYYY-MM-DD format
            if date_published.startswith("D:"):
                date_published = f"{date_published[2:6]}-{date_published[6:8]}-{date_published[8:10]}"
    except Exception as e:
        print(f"Error extracting metadata with PyPDF: {e}")

    return title.strip(), author.strip(), date_published.strip(), keywords.strip()


def pymupdf_extract_pdf_metadata(pdf_path):
    """Extracts metadata (title, author, date) from a PDF file."""
    title, author, date_published = "Unknown", "Unknown", "Unknown"

    try:
        with open(pdf_path, "rb") as f:
            pdf_reader = pymupdf.open(f)
            metadata = pdf_reader.metadata or {}

            if metadata:
                # Print all metadata keys for debugging
                for key, value in metadata.items():
                    print(f"   pymupdf* {key}: {value}")

            title = metadata.get("/title", "Unknown")
            author = metadata.get("/author", "Unknown")
            date_published = metadata.get("/creationDate", "Unknown")
            keywords = metadata.get("/keywords", "Unavailable")
            
            # format date correctly - YYYY-MM-DD format
            if date_published.startswith("D:"):
                date_published = f"{date_published[2:6]}-{date_published[6:8]}-{date_published[8:10]}"
    except Exception as e:
        print(f"Error extracting metadata with PyPDF: {e}")

    return title.strip(), author.strip(), date_published.strip(), keywords.strip()

def adjust_page_range(page_range, true_page_1):
    """
    Adjusts the page range based on a manually specified 'true' page 1.
    - If true_page_1 = 15 and requested range is (1, 12), 
      actual PDF pages are (15, 26).
    """
    if not page_range or true_page_1 is None:
        return None  # No adjustments needed

    start, end = page_range

    # Compute actual PDF pages (zero-indexed for fitz)
    adjusted_start = (start - 1) + (true_page_1 - 1)
    adjusted_end = (end - 1) + (true_page_1 - 1)

    return (adjusted_start, adjusted_end)

def extract_pdf_text(pdf_path, page_range=None, true_page_1=None):
    """Extracts text from a PDF, adjusting for true page 1 and distinguishing sections."""
    try:
        doc = pymupdf.open(pdf_path)
        extracted_sections = []
        adjusted_range = adjust_page_range(page_range, true_page_1)
        #print(f"True Page 1: {true_page_1}, Requested Pages: {page_range}")
        #print(f"Adjusted Pages: {adjusted_range}, FitZ Total Pages: {len(doc)}")

        # Get pages in adjusted range (or full doc if no range)
        page_numbers = range(len(doc)) if adjusted_range is None else range(adjusted_range[0], adjusted_range[1] + 1)

        current_section = {"subheading": "Introduction", "text": ""}

        for page_num in page_numbers:
            page = doc[page_num]
            blocks = page.get_text("blocks")  # Get text as blocks to detect headings

            for block in sorted(blocks, key=lambda b: b[1]):  # Sort by y-coordinate (top to bottom)
                text = block[4].strip()

                if not text:
                    continue  # Skip empty blocks
                
                # Detect headings based on text formatting
                if text.isupper() or len(text) < 40:  
                    # If previous section has content, save it
                    if current_section["text"].strip():
                        extracted_sections.append(current_section)
                    
                    # Start new section
                    current_section = {"subheading": text, "text": ""}
                else:
                    current_section["text"] += " " + text  # Append text to current section
            
        # Append the last section
        if current_section["text"].strip():
            extracted_sections.append(current_section)

        return extracted_sections
    
    except Exception as e:
        print(f"Error extracting text from {pdf_path}: {e}")
        return []


def scrape_pdf(pdf_path, page_range=None, true_page_1=None):
    """
    Extracts metadata and structured text from a PDF.
    - Books: Uses true page 1 and page range
    - Research Papers: Extracts full text as sections
    """
    title, author, date_published, keywords = extract_pdf_metadata(pdf_path)
    #title, author, date_published, keywords = pymupdf_extract_pdf_metadata(pdf_path)
    sections = extract_pdf_text(pdf_path, page_range, true_page_1)

    # title can't be unknown, to not override. fallback to file name
    if title == 'Unknown':
        title = Path(pdf_path).stem

    return {
        "document_type": "book" if page_range else "research_paper",
        "title": title,
        "author": author,
        "source": pdf_path,
        "date_published": date_published,
        "keywords": keywords,
        "sections": sections,
    }



In [39]:
import fitz  # PyMuPDF

def extract_pdf_sections(pdf_path, page_range=None, true_page_1=None,
                           header_footer_margin=50, heading_factor=1.2, heading_length_max=80,
                           min_subheaders=3, debug=False):
    """
    Extracts sections from a PDF using a two-pass approach.
    
    First pass uses strict font size criteria: a block is a heading if its text is short (<= heading_length_max)
    and its average font size is at least (page average * heading_factor).
    
    If fewer than min_subheaders are detected, a second pass is run that treats any block as a heading if its average
    font size is greater than the page average and its text is short.
    
    Processing stops if a block containing "References" is encountered.
    
    Parameters:
      pdf_path (str): Path to the PDF.
      page_range (tuple): (start, end) as viewer page numbers.
      true_page_1 (int): The viewer page number corresponding to the first "real" content page.
      header_footer_margin (float): Margin in points to ignore blocks near the top/bottom.
      heading_factor (float): In the first pass, required multiplier of page average font size.
      heading_length_max (int): Maximum length of text to be considered a heading.
      min_subheaders (int): Minimum number of subheaders expected.
      debug (bool): If True, prints debug info.
      
    Returns:
      List[Dict]: Each dict has "subheading" and "text" keys.
    """
    
    def adjust_page_range(page_range, true_page_1):
        # Convert user page numbers (starting at 1) into zero-based PDF indices.
        if not page_range or true_page_1 is None:
            return None
        start, end = page_range
        adjusted_start = (start - 1) + (true_page_1 - 1)
        adjusted_end   = (end - 1)   + (true_page_1 - 1)
        return (adjusted_start, adjusted_end)
    
    def run_extraction(use_relaxed_heading=False):
        sections = []
        current_section = {"subheading": "Introduction", "text": ""}
        subheader_count = 0
        
        doc = fitz.open(pdf_path)
        adjusted = adjust_page_range(page_range, true_page_1) if (page_range and true_page_1) else None
        if adjusted:
            page_nums = range(adjusted[0], adjusted[1] + 1)
        else:
            page_nums = range(len(doc))
        
        stop_processing = False
        for page_num in page_nums:
            if stop_processing:
                break
            page = doc[page_num]
            page_dict = page.get_text("dict")
            page_height = page.rect.height
            
            # Compute average font size for body text (ignoring blocks in header/footer regions)
            body_font_sizes = []
            for block in page_dict["blocks"]:
                if block["bbox"][1] < header_footer_margin or block["bbox"][3] > (page_height - header_footer_margin):
                    continue
                for line in block.get("lines", []):
                    for span in line.get("spans", []):
                        size = span.get("size", 0)
                        if size:
                            body_font_sizes.append(size)
            page_avg_font = sum(body_font_sizes)/len(body_font_sizes) if body_font_sizes else 10
            
            if debug:
                print(f"Page {page_num}: average font size = {page_avg_font:.2f}")
            
            for block in page_dict["blocks"]:
                bbox = block["bbox"]
                if bbox[1] < header_footer_margin or bbox[3] > (page_height - header_footer_margin):
                    continue  # Skip header/footer blocks
                
                # Combine text from all spans in the block.
                block_text = ""
                span_sizes = []
                for line in block.get("lines", []):
                    for span in line.get("spans", []):
                        text = span.get("text", "").strip()
                        if text:
                            block_text += text + " "
                            span_sizes.append(span.get("size", 0))
                block_text = block_text.strip()
                if not block_text:
                    continue
                
                # Stop processing if "References" is encountered.
                if "references" in block_text.lower():
                    if debug:
                        print(f"Encountered 'References' in block: {block_text}. Stopping processing.")
                    stop_processing = True
                    break
                
                block_avg_size = sum(span_sizes)/len(span_sizes) if span_sizes else page_avg_font
                if debug:
                    print(f"Block text: {block_text}")
                    print(f"Block avg size: {block_avg_size:.2f}, Page avg: {page_avg_font:.2f}")
                
                is_heading = False
                if not use_relaxed_heading:
                    # Strict criteria: text length is within limit and average size >= page_avg * heading_factor.
                    if len(block_text) <= heading_length_max and block_avg_size >= page_avg_font * heading_factor:
                        is_heading = True
                        if debug:
                            print(f"--> Strict criteria met for heading: '{block_text}'")
                else:
                    # Relaxed criteria: text length is within limit and average size is just greater than page_avg.
                    if len(block_text) <= heading_length_max and block_avg_size > page_avg_font:
                        is_heading = True
                        if debug:
                            print(f"--> Relaxed criteria met for heading: '{block_text}'")
                
                if is_heading:
                    if current_section["text"]:
                        sections.append(current_section)
                    current_section = {"subheading": block_text, "text": ""}
                    subheader_count += 1
                else:
                    if current_section["text"]:
                        current_section["text"] += " "
                    current_section["text"] += block_text
            # End of page loop.
        if current_section["text"]:
            sections.append(current_section)
        return sections, subheader_count
    
    # First pass: strict criteria.
    sections, count = run_extraction(use_relaxed_heading=False)
    if debug:
        print(f"Strict pass subheader count: {count}")
    # If too few subheaders, run second pass with relaxed criteria.
    if count < min_subheaders:
        print(f"Only {count} subheaders found with strict criteria. Retrying with relaxed criteria.")
        sections, count = run_extraction(use_relaxed_heading=True)
        if debug:
            print(f"Relaxed pass subheader count: {count}")
    
    return sections

# Example usage with debug prints:

pdf_path = r"raw_syllabi\master_courses\Human_computer_interaction\pdf_material\Mathur-2019-Dark-patterns-at-scale.pdf"  # Change to your actual PDF path.
# For example, if your viewer shows pages 1-12 and the "true" page 1 is 15:
sections = extract_pdf_sections(pdf_path, page_range=(1, 12), true_page_1=15,
                                header_footer_margin=50, heading_factor=1.2,
                                heading_length_max=80, min_subheaders=3, debug=True)
print("\nExtracted Sections:")
for sec in sections:
    print("Subheading:", sec["subheading"])
    print("Text:", sec["text"])
    print("-----")


Page 14: average font size = 9.66
Block text: Dark Patterns at Scale 81:15
Block avg size: 7.97, Page avg: 9.66
Block text: (a) Countdown Timer on mattressfirm.com . The header displays a Flash Sale where the majority of discounted products remain the same on a day-to-day basis.
Block avg size: 8.99, Page avg: 9.66
Block text: (b) Countdown Timer on justfab.com . The offer is available even after the timer expires.
Block avg size: 8.97, Page avg: 9.66
Block text: (c) Limited-time Message on chicwish.com . The website claims the sale will end ‘soon’ without stating a deadline.
Block avg size: 8.90, Page avg: 9.66
Block text: Fig. 4. Two types of the Urgency category of dark patterns.
Block avg size: 8.97, Page avg: 9.66
Block text: Countdown Timers . The ‘Countdown Timer’ dark pattern is a dynamic indicator of a deadline, counting down until the deadline expires. Figures 4a and 4b show the Countdown Timer dark pattern on mattressfirm.com and justfab.com , respectively. One indicates the

In [40]:
sections

[{'subheading': 'Introduction',
  'text': 'Dark Patterns at Scale 81:15 (a) Countdown Timer on mattressfirm.com . The header displays a Flash Sale where the majority of discounted products remain the same on a day-to-day basis. (b) Countdown Timer on justfab.com . The offer is available even after the timer expires. (c) Limited-time Message on chicwish.com . The website claims the sale will end ‘soon’ without stating a deadline. Fig. 4. Two types of the Urgency category of dark patterns. Countdown Timers . The ‘Countdown Timer’ dark pattern is a dynamic indicator of a deadline, counting down until the deadline expires. Figures 4a and 4b show the Countdown Timer dark pattern on mattressfirm.com and justfab.com , respectively. One indicates the deadline for a recurring Flash Sale , the other a Member Exclusive . In our data set, we found a total of 393 instances of the Countdown Timer dark pattern. Deceptive Countdown Timers. Using the visit-and-record method described in Section 4.4, we

In [None]:
sec