In [1]:
!pip install requests beautifulsoup4 lxml --quiet


In [6]:
import requests, re, time, sys, os, json
from bs4 import BeautifulSoup
from urllib.parse import urlparse
from typing import Dict, List, Optional
from IPython.display import display, Markdown


class ArticleFetcher:
    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
        })

    def fetch_article(self, url: str) -> Optional[Dict[str, str]]:
        try:
            print(f"fetching: {url}")
            r = self.session.get(url, timeout=30)
            r.raise_for_status()
            soup = BeautifulSoup(r.content, 'html.parser')
            return {
                'url': url,
                'title': self._extract_title(soup),
                'content': self._extract_content(soup),
                'author': self._extract_author(soup),
                'date': self._extract_date(soup),
                'domain': urlparse(url).netloc
            }
        except Exception as e:
            print(f"err: {e}")
            return self._read_from_file()

    def _extract_title(self, soup):
        for sel in ['h1','title','[property="og:title"]','.headline','.article-title']:
            el = soup.select_one(sel)
            if el:
                txt = el.get_text(strip=True)
                if len(txt) > 10: return txt
        return "No title"

    def _extract_content(self, soup):
        for el in soup(['script','style','nav','header','footer','aside']):
            el.decompose()
        content = ""
        for sel in ['article','.article-body','.content','.entry-content','main','[role="main"]']:
            els = soup.select(sel)
            if els:
                content = ' '.join(el.get_text(strip=True) for el in els)
                if len(content) > 200: break
        if len(content) < 200:
            content = ' '.join(p.get_text(strip=True) for p in soup.find_all('p'))
        return re.sub(r'\s+', ' ', content).strip()

    def _extract_author(self, soup):
        for sel in ['[rel="author"]','.author','[property="author"]','.byline']:
            el = soup.select_one(sel)
            if el: return el.get_text(strip=True)
        return "Unknown"

    def _extract_date(self, soup):
        for sel in ['[property="article:published_time"]','time','.date','.published']:
            el = soup.select_one(sel)
            if el: return el.get('datetime') or el.get_text(strip=True)
        return "Unknown"

    def _read_from_file(self):
        try:
            with open('article.txt','r',encoding='utf-8') as f:
                content = f.read().strip()
            return {'url':'local','title':'Local Article','content':content,
                    'author':'Unknown','date':'Unknown','domain':'local'}
        except: return None


class SkepticalAnalyzer:
    def analyze(self, data: Dict[str,str]) -> Dict[str,str]:
        return {
            'core_claims': self._claims(data['content']),
            'language_tone': self._tone(data['content']),
            'red_flags': self._red_flags(data),
            'verification_questions': self._questions(data),
            'key_entities': self._entities(data['content']),
            'counter_perspective': self._counter(data['content'])
        }

    def _claims(self, text):
        sents = text.split('.')
        claims, indicators = [], [
            'according to','data shows','study found','research','report shows',
            'experts say','announced','confirmed','revealed'
        ]
        for s in sents[:20]:
            s = s.strip()
            if len(s) > 30 and any(x in s.lower() for x in indicators):
                claims.append(s+'.')
                if len(claims) >= 5: break
        if not claims:
            claims = [s.strip()+'.' for s in sents if len(s.strip()) > 50][:3]
        return claims

    def _tone(self, text):
        emot = ['devastating','shocking','incredible','amazing','terrible','horrific','unprecedented']
        loaded = ['activist','regime','radical','scandal','crisis','exclusive']
        low = text.lower()
        ecount = sum(1 for w in emot if w in low)
        lcount = sum(1 for w in loaded if w in low)
        ratio = (ecount/ max(len(text.split()),1))*100
        if ratio > 0.5 or lcount > 3:
            return "Heavy emotional/loaded terms."
        elif ratio > 0.2 or lcount > 1:
            return "Some emotional/loaded terms."
        return "Mostly neutral."

    def _red_flags(self, d):
        out, txt = [], d['content'].lower()
        if 'anonymous source' in txt: out.append("Relies on anonymous sources")
        if ('study' in txt or 'research' in txt) and 'http' not in d['content']:
            out.append("Mentions research/data w/o links")
        if not any(x in txt for x in ['however','but','although','critics','opponents']):
            out.append("No counter-views included")
        if any(x in d['title'].lower() for x in ['shocking','incredible','stunning']):
            out.append("Sensational headline")
        if len(d['content'].split()) < 200:
            out.append("Too short, lacks depth")
        return out

    def _questions(self, d):
        qs, text = [], d['content']
        base = [
            f"Who funds {d['domain']}?",
            f"Can claims on {self._topic(text)} be checked elsewhere?",
            "Who are sources and their motives?",
            "What do opponents say?"
        ]
        if 'study' in text.lower(): qs.append("Where is the actual study?")
        if 'expert' in text.lower(): qs.append("What are expert credentials?")
        return (qs + base)[:4]

    def _entities(self, text):
        words, ents = text.split(), {'people':[],'organizations':[],'locations':[]}
        for i,w in enumerate(words):
            if w[0].isupper() and len(w)>2 and i < len(words)-1 and words[i+1][0].isupper():
                ent = f"{w} {words[i+1]}"
                if any(x in ent for x in ['Inc.','Corp.','Institute']):
                    ents['organizations'].append(ent)
                else: ents['people'].append(ent)
        return {k:list(set(v)) for k,v in ents.items()}

    def _counter(self,text):
        return f"A skeptic might argue it's one-sided, ignoring nuance about '{self._topic(text)}'"

    def _topic(self,text):
        words = text.split()[:100]; freq = {}
        stop = {'the','a','and','or','but','in','on','at','to','for','of','with','by','is','are','was','were'}
        for w in words:
            w = w.lower().strip('.,!?":;')
            if len(w)>3 and w not in stop: freq[w] = freq.get(w,0)+1
        return max(freq,key=freq.get) if freq else "subject"


class ReportGenerator:
    def report(self, data:Dict[str,str], a:Dict[str,str]) -> str:
        rep = f"# Report for: {data['title']}\n\n"
        rep += f"**Source:** {data['url']}  \n**Author:** {data['author']}  \n**Date:** {data['date']}  \n**Domain:** {data['domain']}\n\n---\n\n"
        rep += "## Core Claims\n" + self._lst(a['core_claims'])
        rep += "\n\n## Tone\n" + a['language_tone']
        rep += "\n\n## Red Flags\n" + self._lst(a['red_flags'])
        rep += "\n\n## Verification Qs\n" + self._lst(a['verification_questions'],num=True)
        rep += "\n\n---\n\n### Entities\n" + self._ents(a['key_entities'])
        rep += "\n\n### Counter\n" + a['counter_perspective']
        return rep

    def _lst(self,items,num=False):
        if not items: return "- none"
        if num: return '\n'.join(f"{i+1}. {x}" for i,x in enumerate(items))
        return '\n'.join(f"- {x}" for x in items)

    def _ents(self, entities):
        out=[]
        for k,v in entities.items():
            if v: out.append(f"**{k.title()}:** {', '.join(v[:3])}")
        return '\n'.join(out) if out else "none"


In [8]:
# quick runner for Digital Skeptic AI
article_url = "https://medium.com/predict/just-a-random-article-13fbe8bfc768"

def run_analysis(url: str):
    print("== Digital Skeptic AI ==")
    print("thinking critically about articles...\n")

    try:
        fetcher = ArticleFetcher()
        analyzer = SkepticalAnalyzer()
        generator = ReportGenerator()

        data = fetcher.fetch_article(url) if url else fetcher._read_from_file()
        if not data:
            print("couldn't fetch anything"); return
        if len(data.get('content', "")) < 100:
            print("warn: article looks too short")

        a = analyzer.analyze(data)
        report = generator.report(data, a)

        fname = f"analysis_{int(time.time())}.md"
        with open(fname,'w',encoding='utf-8') as f: f.write(report)

        print(f"\nreport saved -> {fname}\n")
        display(Markdown(report))

    except KeyboardInterrupt:
        print("\nuser stopped it.")
    except Exception as e:
        print(f"error: {e}")

# run
run_analysis(article_url)


== Digital Skeptic AI ==
thinking critically about articles...

fetching: /content/article.txt
err: Invalid URL '/content/article.txt': No scheme supplied. Perhaps you meant https:///content/article.txt?

report saved -> analysis_1756380489.md



# Report for: Local Article

**Source:** local  
**Author:** Unknown  
**Date:** Unknown  
**Domain:** local

---

## Core Claims
- Breaking: Revolutionary Study Reveals Shocking Truth About Coffee Consumption

A groundbreaking new study by the Institute for Advanced Nutritional Research has discovered that drinking coffee may be linked to unprecedented health benefits that experts are calling "absolutely remarkable.
- Sarah Johnson, lead researcher on the project, "Our data shows that people who drink 3-4 cups of coffee daily experience a 45% reduction in serious health issues compared to non-coffee drinkers.
- The research, funded by an anonymous benefactor, utilized cutting-edge methodology that previous studies have failed to implement.
- Some critics argue that the study's methodology may be flawed, though they have not provided specific objections to the research design.
- "

The Institute for Advanced Nutritional Research plans to publish their full findings next month, though the complete dataset will not be made publicly available due to privacy concerns regarding participant information.

## Tone
Heavy emotional/loaded terms.

## Red Flags
- Mentions research/data w/o links

## Verification Qs
1. Where is the actual study?
2. What are expert credentials?
3. Who funds local?
4. Can claims on coffee be checked elsewhere?

---

### Entities
**People:** Mark Thompson,, Study Reveals, Revolutionary Study
**Organizations:** The Institute

### Counter
A skeptic might argue it's one-sided, ignoring nuance about 'coffee'