# Testing for Alpha Vantage api

In [14]:
import requests
import json

# replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
url = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=AAPL&apikey=1RZHEEKZF7GJSSST&time_from=20250101T0000&time_to=20250131T2359&sort=EARLIEST&limit=1000'
r = requests.get(url)
data = r.json()
pretty_data = json.dumps(data,indent=5)
print(pretty_data)
with open('data.json','w',encoding='utf-8') as f:
    json.dump(data,f,ensure_ascii=False,indent=5)

{
     "items": "470",
     "sentiment_score_definition": "x <= -0.35: Bearish; -0.35 < x <= -0.15: Somewhat-Bearish; -0.15 < x < 0.15: Neutral; 0.15 <= x < 0.35: Somewhat_Bullish; x >= 0.35: Bullish",
     "relevance_score_definition": "0 < x <= 1, with a higher score indicating higher relevance.",
     "feed": [
          {
               "title": "10 Stock Market Predictions for 2025",
               "url": "https://www.fool.com/investing/2025/01/01/10-stock-market-predictions-for-2025/",
               "time_published": "20250101T100600",
               "authors": [
                    "Sean Williams"
               ],
               "summary": "Here's what to expect from the stock market, along with some of Wall Street's hottest trends and most influential businesses, in the new year.",
               "banner_image": "https://g.foolcdn.com/image/?url=https%3A%2F%2Fg.foolcdn.com%2Feditorial%2Fimages%2F802285%2Fbear-market-stock-chart-quarter-report-financial-metrics-invest-getty.jp

# Real news data gathering starts from here. 
Pros: Gives title, summary, published date, stock symbol, news sentiment score, and news sentiment label

Cons: Can't provided data for year 2020 or before, (tentative I have to check for 21 and 22 also, provides data from 2023)

In [2]:
import requests
import pandas as pd
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

In [3]:
# Your Alpha Vantage API key
ALPHA_VANTAGE_API_KEY = '1RZHEEKZF7GJSSST'
BASE_URL = "https://www.alphavantage.co/query?function=NEWS_SENTIMENT"

In [4]:
# List of tickers for which to retrieve news
tickers = ['AAPL', 'TSLA', 'AMZN', 'NKE', 'NVDA']

# Overall date range for the news retrieval
overall_start = datetime(2025, 1, 1)
overall_end = datetime(2025, 3, 31)

In [5]:
def get_monthly_ranges(start_date, end_date):
    """
    Breaks the overall date range into monthly intervals.
    Returns a list of tuples, each with a (formatted_month_start, formatted_month_end) string,
    using the format 'YYYYMMDDTHHMM'.
    """
    ranges = []
    current = start_date
    while current <= end_date:
        month_start = current.replace(day=1)
        month_end = (month_start + relativedelta(months=1)) - timedelta(days=1)
        # Ensure the month_end does not exceed the overall end date
        if month_end > end_date:
            month_end = end_date
        # Format both start and end dates to 'YYYYMMDDTHHMM'
        start_str = month_start.strftime("%Y%m%dT%H%M")
        end_str = month_end.strftime("%Y%m%dT%H%M")
        ranges.append((start_str, end_str))
        current = month_end + timedelta(days=1)
    return ranges

monthly_ranges = get_monthly_ranges(overall_start, overall_end)
print(monthly_ranges)

[('20250101T0000', '20250131T0000'), ('20250201T0000', '20250228T0000'), ('20250301T0000', '20250331T0000')]


In [12]:
def get_url(ticker,start_date,end_date,api_key,limit=1000,sort='EARLIEST'):
    ticker_url = BASE_URL
    ticker_url += '&tickers=' + ticker
    ticker_url += '&apikey=' + api_key
    ticker_url += '&time_from=' + start_date
    ticker_url += '&time_to=' + end_date
    ticker_url += '&sort=' + sort
    ticker_url += '&limit=' + str(limit)
    return ticker_url

In [7]:
def get_news_for_ticker_alphavantage(ticker_url):
    """
    Retrieves news for a given ticker from the Alpha Vantage NEWS_SENTIMENT endpoint
    for the specified date range.
    
    Parameters:
      - ticker_url: API call url
      
    Returns:
      A list of dictionaries, each representing an article with:
        - published_date: Publication date (string format from API)
        - title: Article title
        - summary: Article summary
        - ticker: The current ticker symbol
        - ticker_sentiment_score: Sentiment score for the ticker (if available)
        - ticker_sentiment_label: Sentiment label for the ticker (if available)
    """
    response = requests.get(ticker_url)
    data = response.json()
    
    articles = []
    # Check if the response contains a news feed
    if 'feed' in data:
        for item in data['feed']:
            published_date = item.get("time_published", "")  # e.g., "20250101T100600"
            title = item.get("title", "")
            summary = item.get("summary", "")
            
            # Extract ticker sentiment details for the current ticker
            ticker_sentiments = item.get("ticker_sentiment", [])
            ticker_sentiment_score = None
            ticker_sentiment_label = None
            for sentiment in ticker_sentiments:
                if sentiment.get("ticker") == ticker:
                    ticker_sentiment_score = sentiment.get("ticker_sentiment_score")
                    ticker_sentiment_label = sentiment.get("ticker_sentiment_label")
                    break

            articles.append({
                "published_date": published_date,
                "title": title,
                "summary": summary,
                "ticker": ticker,
                "ticker_sentiment_score": ticker_sentiment_score,
                "ticker_sentiment_label": ticker_sentiment_label
            })
    else:
        print(f"No news feed returned for {ticker} between {start_date.strftime('%Y-%m-%d')} and {end_date.strftime('%Y-%m-%d')}. Response: {data}")
    return articles

In [None]:
for ticker in tickers:
    ticker_articles = []
    for start, end in monthly_ranges:
        print(f"Fetching {ticker} news from {start} to {end}")
        ticker_url = get_url(ticker,start,end,ALPHA_VANTAGE_API_KEY)
        print(ticker_url)
        articles = get_news_for_ticker_alphavantage(ticker_url)
        ticker_articles.extend(articles)
    
    # Optionally deduplicate articles based on title and published date to remove overlaps
    df = pd.DataFrame(ticker_articles).drop_duplicates(subset=["title", "published_date"])
    df.to_csv(f"../results/{ticker}_alpha_news_data.csv", index=False)
    #all_articles.append(df)
    
print("All News articles saved.")

Fetching AAPL news from 20250101T0000 to 20250131T0000
https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=AAPL&apikey=1RZHEEKZF7GJSSST&time_from=20250101T0000&time_to=20250131T0000&sort=EARLIEST&limit=1000
Fetching AAPL news from 20250201T0000 to 20250228T0000
https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=AAPL&apikey=1RZHEEKZF7GJSSST&time_from=20250201T0000&time_to=20250228T0000&sort=EARLIEST&limit=1000
Fetching AAPL news from 20250301T0000 to 20250331T0000
https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=AAPL&apikey=1RZHEEKZF7GJSSST&time_from=20250301T0000&time_to=20250331T0000&sort=EARLIEST&limit=1000
Fetching TSLA news from 20250101T0000 to 20250131T0000
https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=TSLA&apikey=1RZHEEKZF7GJSSST&time_from=20250101T0000&time_to=20250131T0000&sort=EARLIEST&limit=1000
Fetching TSLA news from 20250201T0000 to 20250228T0000
https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tic