<a href="https://colab.research.google.com/github/Tanzaniav0825/CS667/blob/main/Project_1_deliverables_3_ip.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from getpass import getpass
SERPAPI_KEY = getpass("Enter your SerpAPI key:")

Enter your SerpAPI key:··········


In [4]:
!pip install tldextract
!pip install joblib



Collecting tldextract
  Downloading tldextract-5.1.3-py3-none-any.whl.metadata (11 kB)
Collecting requests-file>=1.4 (from tldextract)
  Downloading requests_file-2.1.0-py2.py3-none-any.whl.metadata (1.7 kB)
Downloading tldextract-5.1.3-py3-none-any.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.9/104.9 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading requests_file-2.1.0-py2.py3-none-any.whl (4.2 kB)
Installing collected packages: requests-file, tldextract
Successfully installed requests-file-2.1.0 tldextract-5.1.3


In [5]:
!pip install google-search-results


Collecting google-search-results
  Downloading google_search_results-2.4.2.tar.gz (18 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: google-search-results
  Building wheel for google-search-results (setup.py) ... [?25l[?25hdone
  Created wheel for google-search-results: filename=google_search_results-2.4.2-py3-none-any.whl size=32010 sha256=08c75074a568f08f518b7ebe8ccadcd8d345aa4f0155ab8b5a2463229cb4f8fa
  Stored in directory: /root/.cache/pip/wheels/6e/42/3e/aeb691b02cb7175ec70e2da04b5658d4739d2b41e5f73cd06f
Successfully built google-search-results
Installing collected packages: google-search-results
Successfully installed google-search-results-2.4.2


In [9]:
import streamlit as st
import tldextract
import re
import random
from serpapi import GoogleSearch

class CredibilityScorer:
    def __init__(self, url, serpapi_key=None):
        self.url = url
        self.serpapi_key = serpapi_key
        self.domain_info = tldextract.extract(url)
        self.domain = f"{self.domain_info.domain}.{self.domain_info.suffix}"
        self.score = 0.0
        self.explanation_parts = []

    def domain_suffix_score(self): ...
    def trusted_domain_score(self): ...
    def recent_year_score(self): ...
    def https_check(self): ...
    def url_length_score(self): ...
    def keyword_check(self): ...
    def spam_word_check(self): ...
    def ml_model_score(self): ...

    def serpapi_credibility_score(self):
        if not self.serpapi_key:
            self.explanation_parts.append("🔍 SERP analysis skipped (no API key provided).")
            return

        try:
            params = {
                "q": self.url,
                "api_key": self.serpapi_key,
                "num": 5
            }
            search = GoogleSearch(params)
            results = search.get_dict()
            found = False

            for idx, result in enumerate(results.get("organic_results", [])):
                serp_url = result.get("link", "")
                serp_domain = tldextract.extract(serp_url)
                serp_normalized = f"{serp_domain.domain}.{serp_domain.suffix}"

                if serp_normalized == self.domain:
                    found = True
                    if idx == 0:
                        self.score += 0.2
                        self.explanation_parts.append("🔝 URL appears as the top Google result.")
                    elif idx <= 2:
                        self.score += 0.1
                        self.explanation_parts.append("⬆️ URL ranks within top 3 Google results.")
                    else:
                        self.explanation_parts.append("🔍 URL found in lower Google results.")

                    title = result.get("title", "").lower()
                    snippet = result.get("snippet", "").lower()
                    if any(kw in title + snippet for kw in ["study", "doi", "research", "journal"]):
                        self.score += 0.1
                        self.explanation_parts.append("📚 SERP snippet contains scholarly language.")
                    if any(kw in title for kw in ["miracle", "shocking", "click here"]):
                        self.score -= 0.2
                        self.explanation_parts.append("🚫 Clickbait detected in SERP title.")
                    break

            if not found:
                self.explanation_parts.append("⚠️ Domain not matched in top Google search results.")

        except Exception as e:
            self.explanation_parts.append(f"⚠️ SERP fetch failed: {str(e)}")

    def compute_star_rating(self):
        if self.score >= 0.9:
            return "⭐⭐⭐⭐⭐"
        elif self.score >= 0.75:
            return "⭐⭐⭐⭐"
        elif self.score >= 0.6:
            return "⭐⭐⭐"
        elif self.score >= 0.4:
            return "⭐⭐"
        else:
            return "⭐"

    def evaluate(self):
        self.domain_suffix_score()
        self.trusted_domain_score()
        self.recent_year_score()
        self.https_check()
        self.url_length_score()
        self.keyword_check()
        self.spam_word_check()
        self.ml_model_score()
        self.serpapi_credibility_score()

        return {
            "url": self.url,
            "score": round(min(self.score, 1.0), 2),
            "stars": self.compute_star_rating(),
            "explanation": " ".join(self.explanation_parts)
        }

# -------------------------------
# 🌐 Streamlit App UI
# -------------------------------
st.set_page_config(page_title="Credibility Scorer", page_icon="🔍")
st.title("🔍 URL Credibility Scorer")
st.markdown("Evaluate the trustworthiness of any URL using rule-based and SERP-based analysis.")

url = st.text_input("🔗 Enter a URL to evaluate:")
serpapi_key = st.text_input("🔐 SerpAPI Key (optional for deeper analysis):", type="password")

if st.button("Evaluate"):
    if not url.startswith("http"):
        st.warning("⚠️ Please enter a full URL starting with http:// or https://")
    else:
        scorer = CredibilityScorer(url, serpapi_key if serpapi_key else None)
        result = scorer.evaluate()

        st.markdown(f"### ⭐ Credibility Score: `{result['stars']}` ({result['score']} / 1.0)")
        st.subheader("📘 Explanation")
        st.markdown(result["explanation"])


