In [1]:
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def fetch_web_content(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        return ' '.join([par.text for par in soup.find_all('p')])
    except Exception as e:
        print(f"Error fetching content from {url}: {str(e)}")
        return ""

def preprocess_text(text):
    # Implement additional preprocessing if necessary
    return text.lower()

def calculate_similarity(text1, text2):
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform([text1, text2])
    similarity_matrix = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix)
    return similarity_matrix[0][1]

def check_plagiarism(original_text, url_to_check):
    web_content = fetch_web_content(url_to_check)

    if not web_content:
        print(f"No content fetched from {url_to_check}")
        return

    original_text = preprocess_text(original_text)
    web_content = preprocess_text(web_content)

    similarity = calculate_similarity(original_text, web_content) * 100
    print(f"Similarity between the original text and content from {url_to_check}: {similarity:.2f}%")
