IMPORT LIBRARIES

In [None]:
# pip install requests python-dotenv

import os
import requests
import time
from bs4 import BeautifulSoup as bs

MY STACK EXCHANGE API KEY

In [None]:
STACK_EXCHANGE_API_KEY = 'rl_2AK3hrozTLChbcAFfTMnbu2km'

In [None]:
element_protrusion_key_properties = ['position', 'float', 'height', 'min-height', 'width', 'min-width', 'display', 'margin', 'padding', 'font-size', 'white-space']
element_collision_key_properties = ['position', 'float', 'margin', 'height', 'min-height', 'width', 'min-width', 'display', 'margin', 'padding', 'flex-wrap', 'max-height', 'max-width']
viewport_protrusion_key_properties = ['position', 'float', 'height', 'min-height', 'width', 'min-width', 'margin', 'padding', 'font-size' 'white-space']
wrapping_key_properties = ['display', 'flex', 'float', 'width', 'min-width', 'margin', 'padding', 'font-size']

In [None]:
def keyword_search(properties_set, body):
    body_case_lower = body.lower()
    for keyword in properties_set:
        if keyword in body_case_lower:
             return True
    
    return False

In [None]:
from bs4 import NavigableString
import html

def clean_html_content(html_text):
    soup = bs(html_text, "lxml")
    cleaned = []
    
    for element in soup.descendants:
        if isinstance(element, NavigableString):
            if element.parent.name == 'code':
                # Preserve code block content
                decoded = html.unescape(str(element))
                cleaned.append(f'<code>{decoded}</code>')
            elif element.parent.name not in ['code', '[document]']:
                cleaned.append(' '.join(element.strip().split()))
    
    return '\n'.join([line for line in ' '.join(cleaned).split('\n') if line.strip()])

In [None]:
def fetch_stackoverflow_threads(tags, question, key_set, min_answer_count=1):
    page = 1
    has_more = True
    API_KEY = STACK_EXCHANGE_API_KEY
    base_url = "https://api.stackexchange.com/2.3/search/advanced"
    
    threads = []
    
    while has_more:
        params = {
            "page": page,
            "pagesize": 100,
            "order": "desc",
            "sort": "votes",
            "q": question,
            "tagged": ";".join(tags),
            "site": "stackoverflow",
            "key": API_KEY,
            "filter": "!0WRXThY5-I_TB(WxIdhBDpR5e"
        }
        
        try:
            response = requests.get(base_url, params=params)
            print(response.url)
            response.raise_for_status()
            data = response.json()
        
            for item in data.get("items", []):
                if item["answer_count"] >= min_answer_count:
                    thread = {
                        "title": item["title"],
                        "question_body": clean_html_content(item["body"]),
                        "link": item["link"],
                        "score": item["score"],
                        "tags": item["tags"],
                        "question_id": item["question_id"],
                        "answer_count": item["answer_count"],
                        "view_count": item["view_count"],
                        "comment_count": item["comment_count"],
                        "comments": [],
                        "answers": []
                    }

                    if item["comment_count"] > 0:
                        for comment in item["comments"]:
                            cleaned_text = clean_html_content(comment["body"])
                            if keyword_search(key_set, cleaned_text):
                                thread["comments"].append({"body": cleaned_text})

                    if item["is_answered"]:
                        answer_url = f'https://api.stackexchange.com/2.3/questions/{item["question_id"]}/answers'
                        params = {
                            "key": API_KEY,
                            'order': 'asc',
                            'sort': 'votes',    
                            'site': 'stackoverflow',   
                            'filter': '!)rlHR4RcMJ3iyACK5j)T',  
                        }
                        response = requests.get(answer_url, params=params)
                        print(response.url)
                        response.raise_for_status()
                        answers = response.json()

                        cleaned_answers = ''
                        for answer in answers['items']:
                            if answer['score'] > 0:
                                cleaned_answer = clean_html_content(answer["body"])
                                if keyword_search(key_set, cleaned_answer):
                                    thread["answers"].append({
                                        "score": answer["score"],
                                        "body": cleaned_answer
                                    })
                                
                if len(thread["answers"]) == 0 and len(thread["comments"]) == 0:
                    continue
                            
                threads.append(thread)

            has_more = False
            page += 1

            # Respect rate limits
            if 'backoff' in response:
                time.sleep(response['backoff'] + 1)
    
        except requests.exceptions.RequestException as e:
            print(f"API request failed: {e}")
            return []
        
    return threads

In [None]:
collision_tags = ['css', 'html']
question = "element overlap fix"
collision_qa_threads = fetch_stackoverflow_threads(
    tags=collision_tags,
    question=question,
    key_set=element_collision_key_properties,
    min_answer_count=1
)

In [None]:
collision_tags = ['css', 'html']
question = "div overlap fix"
collision_qa_threads_v2 = fetch_stackoverflow_threads(
    tags=collision_tags,
    question=question,
    key_set=element_collision_key_properties,
    min_answer_count=1
)
collision_qa_threads.append(collision_qa_threads_v2)

In [None]:
collision_tags = ['css', 'html']
question = "element collision fix"
collision_qa_threads_v2 = fetch_stackoverflow_threads(
    tags=collision_tags,
    question=question,
    key_set=element_collision_key_properties,
    min_answer_count=1
)
collision_qa_threads.append(collision_qa_threads_v2)

In [None]:
protrusion_tags = ['css', 'html']
question = "overflow fix"
protrusion_qa_threads = fetch_stackoverflow_threads(
    tags=protrusion_tags,
    question=question,
    key_set=element_protrusion_key_properties,
    min_answer_count=1
)

In [None]:
protrusion_tags = ['css', 'html']
question = "element protrude fix"
protrusion_qa_threads_v2 = fetch_stackoverflow_threads(
    tags=protrusion_tags,
    question=question,
    key_set=element_protrusion_key_properties,
    min_answer_count=1
)
protrusion_qa_threads.append(protrusion_qa_threads_v2)

In [None]:
protrusion_tags = ['css', 'html']
question = "getting outside screen fix"
protrusion_qa_threads_v2 = fetch_stackoverflow_threads(
    tags=protrusion_tags,
    question=question,
    key_set=element_protrusion_key_properties,
    min_answer_count=1
)
protrusion_qa_threads.append(protrusion_qa_threads_v2)

In [None]:
wrapping_tags = ['css', 'html']
question = "element next line"
wrapping_qa_threads = fetch_stackoverflow_threads(
    tags=wrapping_tags,
    question=question,
    key_set=wrapping_key_properties,
    min_answer_count=1
)

In [None]:
import json

def save_to_json(data, filename):
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)
        
def save_to_file(threads, file_name):
    save_to_json(threads, file_name)
    print(f"Saved {len(threads)} threads to {file_name}")

In [None]:
save_to_file(collision_qa_threads, "stackoverflow_collision_threads.json")

In [None]:
save_to_file(protrusion_qa_threads, "stackoverflow_protrusion_threads.json")

In [None]:
save_to_file(wrapping_qa_threads, "stackoverflow_wrapping_threads.json")

In [None]:
import json

def count_ans_comments(file_path):
    grand_sum_answers = 0
    grand_sum_comments = 0
    with open(file_path, encoding='utf-8') as file:
        data = json.load(file)
        print(len(data))
        for i in range(len(data)):
            if 'answers' in data[i]:
                answer = len(data[i]['answers'])
                grand_sum_answers += answer

                grand_sum_comments += comments


    print("total answers: ", grand_sum_answers)
    print("total comments: ", grand_sum_comments)

In [None]:
file_path = "stackoverflow_collision_threads.json"
count_ans_comments(file_path)

In [None]:
file_path = "stackoverflow_protrusion_threads.json"
count_ans_comments(file_path)

In [None]:
file_path = "stackoverflow_wrapping_threads.json"
count_ans_comments(file_path)

In [None]:
import nltk
nltk.download()
pip install rake-nltk

In [None]:
from rake_nltk import Rake

r = Rake()
ec = "Elements collide into one another due to insufficient accommodation space when viewport width reduces"
ep = "When the child element is contained within its container, but as the viewport width decreases, it lacks sufficient space to fit within its parent. As a result, the child element protrudes out of its container."
vp = "As the viewport size decreases, elements may not only overflow their containers but also protrude out of the viewable area of the webpage (i.e., the <BODY> tag), causing them to appear outside the horizontally visible portion of the page."
we = "When the container is not wide enough but has a flexible height, horizontally aligned elements contained within it no longer fit side by side, causing “wrap” to a new line on the page."

In [None]:
r.extract_keywords_from_text(ec)
r.get_ranked_phrases_with_scores()

In [None]:
r.extract_keywords_from_text(ep)
r.get_ranked_phrases_with_scores()

In [None]:
r.extract_keywords_from_text(vp)
r.get_ranked_phrases_with_scores()

In [None]:
r.extract_keywords_from_text(we)
r.get_ranked_phrases_with_scores()