In [1]:
from alpha_vantage.timeseries import TimeSeries  # Import Alpha Vantage API for time series data
import pandas as pd  # Import pandas for data manipulation
import os  # Import os to handle file and directory operations

# Create the directory if it doesn't exist to store the CSV files
os.makedirs("../Data", exist_ok=True)

# API key for Alpha Vantage (replace with your actual key)
API_KEY = ""I

# Initialize the Alpha Vantage API with the provided API key
ts = TimeSeries(key=API_KEY, output_format="pandas")

# List of top 5 companies in India (symbols for BSE stock market)
symbols = ["RELIANCE.BSE", "TCS.BSE", "HDFCBANK.BSE", "INFY.BSE", "ICICIBANK.BSE"]

# Loop through the list of company symbols to fetch and save stock data
for symbol in symbols:
    print(f"Fetching stock data for {symbol}...")  # Print the symbol of the company being fetched

    # Fetch the full daily stock data for the current symbol
    data, meta_data = ts.get_daily(symbol=symbol, outputsize="full")

    # Convert the index (date) to datetime format for easier filtering
    data.index = pd.to_datetime(data.index)

    # Filter the data to include only the range from 2020 to 2024
    data_filtered = data[(data.index >= "2020-01-01") & (data.index <= "2024-12-31")]

    # Define the path where the data will be saved (with company symbol in filename)
    file_path = f"Data/{symbol}_Stock_2020_2024.csv"
    
    # Save the filtered data to a CSV file
    data_filtered.to_csv(file_path)

    # Print a success message after saving the data
    print(f"✅ {symbol} 2020–2024 stock data saved to {file_path}")

# Sample output can be viewed
data_filtered.head()

Fetching stock data for RELIANCE.BSE...
✅ RELIANCE.BSE 2020–2024 stock data saved to ../Data/RELIANCE.BSE_Stock_2020_2024.csv
Fetching stock data for TCS.BSE...
✅ TCS.BSE 2020–2024 stock data saved to ../Data/TCS.BSE_Stock_2020_2024.csv
Fetching stock data for HDFCBANK.BSE...
✅ HDFCBANK.BSE 2020–2024 stock data saved to ../Data/HDFCBANK.BSE_Stock_2020_2024.csv
Fetching stock data for INFY.BSE...
✅ INFY.BSE 2020–2024 stock data saved to ../Data/INFY.BSE_Stock_2020_2024.csv
Fetching stock data for ICICIBANK.BSE...
✅ ICICIBANK.BSE 2020–2024 stock data saved to ../Data/ICICIBANK.BSE_Stock_2020_2024.csv


Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-12-31,1287.95,1292.0,1280.4,1282.0,44526.0
2024-12-30,1300.05,1326.9,1289.55,1293.9,209890.0
2024-12-27,1298.0,1315.55,1298.0,1307.15,261601.0
2024-12-26,1300.05,1312.5,1289.2,1298.2,272298.0
2024-12-24,1298.9,1301.4,1290.15,1297.65,205370.0


In [3]:
import feedparser  # Parses RSS feeds
import pandas as pd  # For handling the data
from datetime import datetime, timedelta  # Handle date ranges month by month

# Function to get RSS feed headlines for a specific month and company
def fetch_monthly_news(company, year, month):
    # Format the start date for the month
    start_date = f"{year}-{month:02d}-01"
    
    # Calculate the last day of the month
    end_date = (datetime(year, month, 1) + timedelta(days=31)).replace(day=1) - timedelta(days=1)
    end_date_str = end_date.strftime("%Y-%m-%d")

    # Construct the RSS URL to search for news related to the company
    rss_url = (
        f"https://news.google.com/rss/search?q={company.replace(' ', '+')}"
        f"+after:{start_date}+before:{end_date_str}&hl=en-IN&gl=IN&ceid=IN:en"
    )

    # Parse the RSS feed
    feed = feedparser.parse(rss_url)
    headlines = []

    # Extract the headline and publication date from each entry in the RSS feed
    for entry in feed.entries:
        headlines.append({
            "Company": company,  # Store the company name
            "Headline": entry.title,  # Get the headline
            "Published Date": entry.published if hasattr(entry, 'published') else start_date  # Get the published date
        })
    
    return headlines

# List of top 5 companies in India
companies = ["Reliance Industries", "Tata Consultancy Services", "HDFC Bank", "Infosys", "ICICI Bank"]

# Loop through each company and each month from Jan 2020 to Dec 2024 to collect news
all_news = []
for company in companies:
    for year in range(2020, 2025):
        for month in range(1, 13):
            print(f"📅 Fetching news for {company} ({year}-{month:02d})")
            headlines = fetch_monthly_news(company, year, month)
            all_news.extend(headlines)  # Append all articles into the list

# Convert the list of headlines into a DataFrame
df = pd.DataFrame(all_news)

# Save the DataFrame to a CSV file
df.to_csv("Data/Top5_Companies_GoogleNews_Monthly_2020_2024.csv", index=False)

# Print the number of headlines collected
print(f"\nCollected {len(df)} headlines from 2020 to 2024.")

# Display the first few rows of the DataFrame
df.head()

📅 Fetching news for Reliance Industries (2020-01)
📅 Fetching news for Reliance Industries (2020-02)
📅 Fetching news for Reliance Industries (2020-03)
📅 Fetching news for Reliance Industries (2020-04)
📅 Fetching news for Reliance Industries (2020-05)
📅 Fetching news for Reliance Industries (2020-06)
📅 Fetching news for Reliance Industries (2020-07)
📅 Fetching news for Reliance Industries (2020-08)
📅 Fetching news for Reliance Industries (2020-09)
📅 Fetching news for Reliance Industries (2020-10)
📅 Fetching news for Reliance Industries (2020-11)
📅 Fetching news for Reliance Industries (2020-12)
📅 Fetching news for Reliance Industries (2021-01)
📅 Fetching news for Reliance Industries (2021-02)
📅 Fetching news for Reliance Industries (2021-03)
📅 Fetching news for Reliance Industries (2021-04)
📅 Fetching news for Reliance Industries (2021-05)
📅 Fetching news for Reliance Industries (2021-06)
📅 Fetching news for Reliance Industries (2021-07)
📅 Fetching news for Reliance Industries (2021-08)


Unnamed: 0,Company,Headline,Published Date
0,Reliance Industries,Reliance Industries posts record Q3 profit at ...,"Fri, 17 Jan 2020 08:00:00 GMT"
1,Reliance Industries,Building the new Reliance - Fortune India,"Fri, 03 Jan 2020 08:00:00 GMT"
2,Reliance Industries,"Reliance outpaces industry in petrol, diesel s...","Sun, 19 Jan 2020 08:00:00 GMT"
3,Reliance Industries,Reliance launches new road project to counter ...,"Wed, 29 Jan 2020 08:00:00 GMT"
4,Reliance Industries,Reliance to enter restaurant business with Arm...,"Fri, 31 Jan 2020 08:00:00 GMT"
