# Lets Fetch From Yfinance First

In [1]:
import yfinance as yf
import pandas as pd

In [2]:
def fetch_market_data(ticker, start_date, end_date):
    df = yf.download(ticker, auto_adjust=False, start=start_date, end=end_date)
    df = df.droplevel('Ticker', axis=1)
    return df

In [3]:
aapl_df = fetch_market_data('AAPL', '2020-01-01', '2024-12-31')
aapl_df

[*********************100%***********************]  1 of 1 completed


Price,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,72.716080,75.087502,75.150002,73.797501,74.059998,135480400
2020-01-03,72.009117,74.357498,75.144997,74.125000,74.287498,146322800
2020-01-06,72.582893,74.949997,74.989998,73.187500,73.447502,118387200
2020-01-07,72.241539,74.597504,75.224998,74.370003,74.959999,108872000
2020-01-08,73.403633,75.797501,76.110001,74.290001,74.290001,132079200
...,...,...,...,...,...,...
2024-12-23,254.989655,255.270004,255.649994,253.449997,254.770004,40858800
2024-12-24,257.916443,258.200012,258.209991,255.289993,255.490005,23234700
2024-12-26,258.735504,259.019989,260.100006,257.630005,258.190002,27237100
2024-12-27,255.309296,255.589996,258.700012,253.059998,257.829987,42355300


# Fetching From Alpaca Api for News

In [5]:

import requests
from datetime import datetime, timedelta
def get_historical_data(start_date = '2025-01-01', symbol='AAPL'):

    def time_converter(time_str):
        """ Convert ISO 8601 timestamp to YYYY-MM-DD format. """
        # Handle 'Z' timezone indicator by replacing it with +00:00
        if time_str.endswith('Z'):
            time_str = time_str[:-1] + '+00:00'
        
        try:
            # Try to parse with fromisoformat
            timestamp_obj = datetime.fromisoformat(time_str)
        except ValueError:
            # Alternative parsing if fromisoformat fails
            timestamp_obj = datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S.%f%z")
        
        return timestamp_obj.strftime('%Y-%m-%d')

    start_date = start_date

    json_collector = []
    while True:
        end_date = datetime.today().strftime("%Y-%m-%d")
        
        # Stop condition to prevent infinite loop
        if start_date >= end_date:
            print("Reached the latest date. Stopping loop.")
            break

        symbol = symbol
        url = f"https://data.alpaca.markets/v1beta1/news?start={start_date}&sort=asc&symbols={symbol}&limit=50"

        headers = {
            "accept": "application/json",
            "APCA-API-KEY-ID": "PKY7ODAXYMWTX4JK7DVR",
            "APCA-API-SECRET-KEY": "yvQclRacekxCgwL0KX70SFkdbBAsVRvrBQnYaYGY"
        }

        response = requests.get(url, headers=headers)
        
        # Check if the response contains valid JSON data
        if response.status_code != 200:
            print("Error fetching data:", response.status_code)
            break
        
        data = response.json()
        
        # If no more news is returned, break the loop
        if 'news' not in data or not data['news']:
            print("No more news data available. Stopping loop.")
            break
        
        json_collector.extend(data['news'])
        
        # Extract last updated date from the latest fetched news
        last_news_time = data['news'][-1]['updated_at']
        new_start_date = time_converter(last_news_time)

        # Update the start_date more efficiently
        if new_start_date != start_date:
            start_date = new_start_date
            print(f"Same date as previous batch: {start_date}")
        else:
            # Add one day to the start date to avoid duplication
            new_start_date_dt = datetime.strptime(new_start_date, "%Y-%m-%d") + timedelta(days=1)
            start_date = new_start_date_dt.strftime("%Y-%m-%d")
        
        print("Fetching next batch starting from:", start_date)

    return json_collector

In [6]:
def preprocess_data(symbol, start_date):
    # Load the data
    json_data = get_historical_data(start_date=start_date,symbol=symbol)
    df = pd.DataFrame(json_data)
    df['Time'] = pd.DatetimeIndex(df['updated_at'])
    df =df.drop(columns=['created_at','content','images','source','summary','id']).set_index(['Time'])

    return df

In [7]:
data = preprocess_data(symbol='AAPL', start_date='2025-01-01')

Same date as previous batch: 2025-01-07
Fetching next batch starting from: 2025-01-07
Same date as previous batch: 2025-01-15
Fetching next batch starting from: 2025-01-15
Same date as previous batch: 2025-01-21
Fetching next batch starting from: 2025-01-21
Same date as previous batch: 2025-01-24
Fetching next batch starting from: 2025-01-24
Same date as previous batch: 2025-01-29
Fetching next batch starting from: 2025-01-29
Same date as previous batch: 2025-01-31
Fetching next batch starting from: 2025-01-31
Same date as previous batch: 2025-02-03
Fetching next batch starting from: 2025-02-03
Same date as previous batch: 2025-02-08
Fetching next batch starting from: 2025-02-08
Same date as previous batch: 2025-02-14
Fetching next batch starting from: 2025-02-14
Same date as previous batch: 2025-02-19
Fetching next batch starting from: 2025-02-19
Same date as previous batch: 2025-02-24
Fetching next batch starting from: 2025-02-24
Same date as previous batch: 2025-02-28
Fetching next 

In [8]:
data

Unnamed: 0_level_0,author,headline,symbols,updated_at,url
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-01-01 14:15:42+00:00,Chris Katje,EXCLUSIVE: Top 20 Most-Searched Tickers On Ben...,"[AAPL, AMD, AVGO, BTCUSD, CLSK, DELL, DJT, LAE...",2025-01-01T14:15:42Z,https://www.benzinga.com/trading-ideas/25/01/4...
2025-01-01 15:00:28+00:00,Benzinga Insights,Competitor Analysis: Evaluating Apple And Comp...,[AAPL],2025-01-01T15:00:28Z,https://www.benzinga.com/insights/news/25/01/4...
2025-01-01 17:35:15+00:00,Benzinga Insights,10 Information Technology Stocks With Whale Al...,"[AAPL, DELL, INTC, MSFT, MSTR, MU, NVDA, RGTI,...",2025-01-01T17:35:15Z,https://www.benzinga.com/insights/options/25/0...
2025-01-01 18:00:19+00:00,Bibhu Pattnaik,"Warren Buffett Says Buy S&P 500, But This Tech...","[AAPL, MSFT, NVDA]",2025-01-01T18:00:19Z,https://www.benzinga.com/markets/25/01/4275661...
2025-01-02 02:28:02+00:00,Ananya Gairola,"Tim Cook, Mark Zuckerberg, Elon Musk, And More...","[AAPL, GOOG, GOOGL, META, TSLA]",2025-01-02T02:28:02Z,https://www.benzinga.com/25/01/42758055/tim-co...
...,...,...,...,...,...
2025-03-16 14:25:18+00:00,Lekha Gupta,"Coinbase, Apple And Robinhood Are Among Top La...","[AAPL, ADBE, COIN, DAL, EXPE, GMAB, HOOD, IBKR...",2025-03-16T14:25:18Z,https://www.benzinga.com/news/large-cap/25/03/...
2025-03-16 15:38:05+00:00,Lekha Gupta,Consumer Tech News (Mar 10-Mar 14): Manus AI C...,"[AAPL, AMZN, BABA, BLNK, GOOG, META, MSFT, NTD...",2025-03-16T15:38:05Z,https://www.benzinga.com/news/large-cap/25/03/...
2025-03-16 19:40:19+00:00,Bibhu Pattnaik,Apple's iPhone 17 Air to Usher in New Era of S...,[AAPL],2025-03-16T19:40:19Z,https://www.benzinga.com/tech/25/03/44348175/a...
2025-03-17 03:24:42+00:00,Ananya Gairola,Trump Trade Policy A 'Big Headache' For Custom...,"[AAPL, AMZN, NVDA]",2025-03-17T03:24:42Z,https://www.benzinga.com/media/25/03/44349466/...


In [98]:
data.groupby(["Time",'url',"author"])["headline"].apply(lambda x: " | ".join(x)).reset_index()

Unnamed: 0,Time,url,author,headline
0,2025-01-01 14:15:42+00:00,https://www.benzinga.com/trading-ideas/25/01/4...,Chris Katje,EXCLUSIVE: Top 20 Most-Searched Tickers On Ben...
1,2025-01-01 15:00:28+00:00,https://www.benzinga.com/insights/news/25/01/4...,Benzinga Insights,Competitor Analysis: Evaluating Apple And Comp...
2,2025-01-01 17:35:15+00:00,https://www.benzinga.com/insights/options/25/0...,Benzinga Insights,10 Information Technology Stocks With Whale Al...
3,2025-01-01 18:00:19+00:00,https://www.benzinga.com/markets/25/01/4275661...,Bibhu Pattnaik,"Warren Buffett Says Buy S&P 500, But This Tech..."
4,2025-01-02 02:28:02+00:00,https://www.benzinga.com/25/01/42758055/tim-co...,Ananya Gairola,"Tim Cook, Mark Zuckerberg, Elon Musk, And More..."
...,...,...,...,...
621,2025-03-16 11:00:46+00:00,https://www.benzinga.com/25/03/44347352/apples...,Rounak Jain,"Apple's Secretive UK Court Hearing, Metallica ..."
622,2025-03-16 13:00:49+00:00,https://www.benzinga.com/tech/25/03/44347574/s...,Rounak Jain,"Softbank's AI Data Center, Block's AI Boost, A..."
623,2025-03-16 14:25:18+00:00,https://www.benzinga.com/news/large-cap/25/03/...,Lekha Gupta,"Coinbase, Apple And Robinhood Are Among Top La..."
624,2025-03-16 15:38:05+00:00,https://www.benzinga.com/news/large-cap/25/03/...,Lekha Gupta,Consumer Tech News (Mar 10-Mar 14): Manus AI C...


In [99]:
def fix_data_(df):
    df["Time"] = pd.to_datetime(df.index)
    df["Date"] = df["Time"].dt.date  # Extract only the date part

    # Group by 'Ticker' and 'Date' and merge headlines
    df_grouped = df.groupby(["Date"])["headline"].apply(lambda x: " | ".join(x)).reset_index()

    return df_grouped


In [100]:
news_df = fix_data_(data)

In [104]:
news_df['headline'][0]

"EXCLUSIVE: Top 20 Most-Searched Tickers On Benzinga Pro In December 2024 — Where Do Tesla, Nvidia, Quantum Computing Stock Rank? | Competitor Analysis: Evaluating Apple And Competitors In Technology Hardware, Storage &amp; Peripherals Industry | 10 Information Technology Stocks With Whale Alerts In Today's Session | Warren Buffett Says Buy S&P 500, But This Tech Investor Warns Of A 'Rude Awakening'"

In [101]:
def fix_data(df):
    df["Time"] = pd.to_datetime(df.index)
    df["Date"] = df["Time"].dt.date  # Extract only the date part

    # Group by 'Ticker' and 'Date' and merge headlines
    df_grouped = df.groupby(["Date",'url',"author"])["headline"].apply(lambda x: " | ".join(x)).reset_index()

    return df_grouped


In [102]:
data = fix_data(data)

In [103]:
data

Unnamed: 0,Date,url,author,headline
0,2025-01-01,https://www.benzinga.com/insights/news/25/01/4...,Benzinga Insights,Competitor Analysis: Evaluating Apple And Comp...
1,2025-01-01,https://www.benzinga.com/insights/options/25/0...,Benzinga Insights,10 Information Technology Stocks With Whale Al...
2,2025-01-01,https://www.benzinga.com/markets/25/01/4275661...,Bibhu Pattnaik,"Warren Buffett Says Buy S&P 500, But This Tech..."
3,2025-01-01,https://www.benzinga.com/trading-ideas/25/01/4...,Chris Katje,EXCLUSIVE: Top 20 Most-Searched Tickers On Ben...
4,2025-01-02,https://www.benzinga.com/25/01/42758055/tim-co...,Ananya Gairola,"Tim Cook, Mark Zuckerberg, Elon Musk, And More..."
...,...,...,...,...
621,2025-03-16,https://www.benzinga.com/25/03/44347352/apples...,Rounak Jain,"Apple's Secretive UK Court Hearing, Metallica ..."
622,2025-03-16,https://www.benzinga.com/news/large-cap/25/03/...,Lekha Gupta,"Coinbase, Apple And Robinhood Are Among Top La..."
623,2025-03-16,https://www.benzinga.com/news/large-cap/25/03/...,Lekha Gupta,Consumer Tech News (Mar 10-Mar 14): Manus AI C...
624,2025-03-16,https://www.benzinga.com/tech/25/03/44347574/s...,Rounak Jain,"Softbank's AI Data Center, Block's AI Boost, A..."
