In [1]:
import os
import time
import requests
import pandas as pd

# Configuration
API_KEY = 'D247GM10LRQVLMZZ'  
BASE_URL = 'https://www.alphavantage.co/query'
TICKERS = ['AAPL', 'GOOGL', 'MSFT', 'AMD', 'AMZN','IBM','ORCL', 'ACN', 'INFY', 'SAP']
OUTPUT_FOLDER = 'stockdata'
FUNCTION = 'TIME_SERIES_DAILY'  # You can change this as needed

# Create output directory if it doesn't exist
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

def fetch_stock_data(symbol):
    params = {
        'function': FUNCTION,
        'symbol': symbol,
        'apikey': API_KEY,
        'outputsize': 'full',
        'datatype': 'json'
    }

    response = requests.get(BASE_URL, params=params)
    
    try:
        data = response.json()
    except Exception as e:
        print(f"Failed to decode JSON for {symbol}. Response content:")
        print(response.text)
        return None

    if "Time Series (Daily)" in data:
        df = pd.DataFrame.from_dict(data["Time Series (Daily)"], orient='index')
        df = df.rename(columns=lambda x: x[3:].strip())
        df.index.name = 'date'
        df.sort_index(inplace=True)
        return df
    else:
        print(f"Error fetching data for {symbol}. Full response:")
        print(data)
        return None


def save_to_csv(df, symbol):
    file_path = os.path.join(OUTPUT_FOLDER, f"{symbol}.csv")
    df.to_csv(file_path)
    print(f"Saved {symbol} data to {file_path}")

def main():
    for ticker in TICKERS:
        print(f"Fetching data for {ticker}...")
        df = fetch_stock_data(ticker)
        if df is not None:
            save_to_csv(df, ticker)
        time.sleep(12)  # Alpha Vantage free tier allows 5 requests/minute

if __name__ == "__main__":
    main()


Fetching data for AAPL...
Saved AAPL data to stockdata\AAPL.csv
Fetching data for GOOGL...
Saved GOOGL data to stockdata\GOOGL.csv
Fetching data for MSFT...
Saved MSFT data to stockdata\MSFT.csv
Fetching data for AMD...
Saved AMD data to stockdata\AMD.csv
Fetching data for AMZN...
Saved AMZN data to stockdata\AMZN.csv
Fetching data for IBM...
Saved IBM data to stockdata\IBM.csv
Fetching data for ORCL...
Saved ORCL data to stockdata\ORCL.csv
Fetching data for ACN...
Saved ACN data to stockdata\ACN.csv
Fetching data for INFY...
Saved INFY data to stockdata\INFY.csv
Fetching data for SAP...
Saved SAP data to stockdata\SAP.csv


In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

tickers = ['AAPL', 'GOOGL', 'MSFT', 'AMD', 'AMZN', 'IBM', 'ORCL', 'ACN', 'INFY', 'SAP']
start_date = datetime(1999, 11, 1)
end_date = datetime(2025, 4, 3)

all_news = []

def scrape_rss(ticker):
    print(f"\n🔎 Scraping RSS for {ticker}...")
    rss_url = f"https://news.google.com/rss/search?q={ticker}+stock"
    response = requests.get(rss_url)
    
    if response.status_code != 200:
        print(f"❌ Failed to fetch RSS for {ticker}")
        return

    soup = BeautifulSoup(response.content, features='xml')
    items = soup.find_all('item')

    if not items:
        print(f"⚠️ No articles found for {ticker}")
        return

    for item in items:
        title = item.title.text
        link = item.link.text
        pub_date_str = item.pubDate.text
        pub_date = datetime.strptime(pub_date_str, '%a, %d %b %Y %H:%M:%S %Z')

        if start_date <= pub_date <= end_date:
            description = item.description.text
            print(f"  📰 {pub_date.date()} - {title}")
            all_news.append({
                'Ticker': ticker,
                'Date': pub_date.date(),
                'Title': title,
                'Article': description,
                'Link': link
            })

# Scrape all tickers
for ticker in tickers:
    scrape_rss(ticker)

# Save to CSV
df = pd.DataFrame(all_news)
df.to_csv("google_news_rss_articles.csv", index=False)
print("\n✅ News saved to google_news_rss_articles.csv")



🔎 Scraping RSS for AAPL...
  📰 2025-04-01 - Apple (AAPL) Laps the Stock Market: Here's Why - Nasdaq
  📰 2025-03-30 - Is Apple Inc. (AAPL) Best Stock to Buy for the Next 3 Months? - Yahoo Finance
  📰 2025-03-07 - Apple (AAPL) Beats Stock Market Upswing: What Investors Need to Know - Nasdaq
  📰 2025-03-18 - Apple stock price today: What is AAPL stock prediction for 2025? - Markets.com
  📰 2025-02-04 - Apple (AAPL) stock price forecast: third-party price target - Capital.com
  📰 2025-04-01 - Jim Cramer on Apple Inc. (NASDAQ:AAPL): “Apple is a great stock but. . .” - Yahoo Finance
  📰 2025-02-24 - Apple Makes $500 Billion Bet: Is AAPL Stock Ready To Shine Or Stumble? - Benzinga
  📰 2025-03-12 - Watch These Apple Stock Price Levels as Sell-Off Continues - Investopedia
  📰 2025-02-11 - Why Apple (AAPL) Stock Is Trading Up Today - Yahoo Finance
  📰 2025-03-29 - Is Apple Inc. (AAPL) The Mega Cap Stock Gaining Bullish Momentum This Week? - Yahoo Finance
  📰 2025-03-13 - Apple Stock Bulls Are L

In [4]:
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk

# Download VADER lexicon (only once needed)
nltk.download('vader_lexicon')

# Load CSV file (update filename as needed)
df = pd.read_csv("E:/google_news_rss_articles.csv")

# Initialize VADER sentiment analyzer
sid = SentimentIntensityAnalyzer()

# Function to determine sentiment and score
def analyze_sentiment(text):
    scores = sid.polarity_scores(str(text))
    compound = scores['compound']
    if compound >= 0.05:
        sentiment = 'Positive'
    elif compound <= -0.05:
        sentiment = 'Negative'
    else:
        sentiment = 'Neutral'
    return pd.Series([sentiment, compound])

# Apply sentiment analysis to the Title column
df[['Sentiment', 'Sentiment_Score']] = df['Title'].apply(analyze_sentiment)

# Select only required columns
output_df = df[['Ticker', 'Date', 'Title', 'Sentiment', 'Sentiment_Score']]

# Save to new CSV
output_df.to_csv("classified_news_sentiment.csv", index=False)

print("Sentiment classification complete. Output saved as 'classified_news_sentiment.csv'.")


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\lohith\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Sentiment classification complete. Output saved as 'classified_news_sentiment.csv'.
