In [None]:
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
import pandas as pd

# Load the Excel file
file_path = 'Keyword search complete.xlsx'  # Update with the correct path
df = pd.read_excel(file_path)

# Extract relevant columns and filter out rows without a URL
df = df[['Ticker', 'Label', 'URL']].dropna(subset=['URL'])

# Combine all statements for training the model
def scrape_statements(df):
    statements = []
    for url in df['URL']:
        try:
            response = requests.get(url)
            soup = BeautifulSoup(response.text, 'html.parser')
            text = ' '.join(p.get_text() for p in soup.find_all('p'))
            statements.append(text.strip())
        except Exception as e:
            print(f"Error fetching {url}: {e}")
            statements.append("")  # Append empty string if there's an error
    return statements

# Scrape statements and add them to the DataFrame
df['Statement'] = scrape_statements(df)

# Filter for training data (valid labels)
df = df[df['Label'].isin(['Positive', 'Negative', 'Neutral'])]
if df.empty:
    print("No valid data available for training.")
else:
    # Create a pipeline with TF-IDF and Logistic Regression
    model = make_pipeline(TfidfVectorizer(), LogisticRegression(max_iter=1000))
    model.fit(df['Statement'], df['Label'])

    # Prompt the user for a URL
    user_url = input("Please enter a URL to predict its sentiment: ")
    
    try:
        # Scrape the content from the user's URL
        user_response = requests.get(user_url)
        user_soup = BeautifulSoup(user_response.text, 'html.parser')
        user_text = ' '.join(p.get_text() for p in user_soup.find_all('p')).strip()

        # Make a prediction based on the scraped content
        prediction = model.predict([user_text])
        print(f"The predicted sentiment for the provided URL is: {prediction[0]}")

    except Exception as e:
        print(f"Error fetching the URL: {e}")