# Daily Data Collection & Processing

### install python yahoo finance

In [None]:
!pip install yfinance --upgrade --no-cache-dir

# Fetch Stock Data and Compute Sentiments

In [None]:
# top 100 companies in S&P
sp100_tickers = [
    'AAPL', 'ABBV', 'ABT', 'ACN', 'ADBE', 'AIG', 'AMD', 'AMGN', 'AMT', 'AMZN',
    'AVGO', 'AXP', 'BA', 'BAC', 'BK', 'BKNG', 'BLK', 'BRK.B', 'C',
    'CAT', 'CHTR', 'CL', 'CMCSA', 'COF', 'COP', 'COST', 'CRM', 'CSCO', 'CVS',
    'CVX', 'DE', 'DHR', 'DIS', 'DOW', 'DUK', 'EMR', 'F', 'FDX', 'GD', 'GE',
    'GILD', 'GM', 'GOOG', 'GOOGL', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'INTU',
    'JNJ', 'JPM', 'KHC', 'KO', 'LIN', 'LLY', 'LMT', 'LOW', 'MA', 'MCD',
    'MDLZ', 'MDT', 'MET', 'META', 'MMM', 'MO', 'MRK', 'MS', 'MSFT', 'NEE',
    'NFLX', 'NKE', 'NVDA', 'ORCL', 'PEP', 'PFE', 'PG', 'PM', 'PYPL', 'QCOM',
    'RTX', 'SBUX', 'SCHW', 'SO', 'SPG', 'T', 'TGT', 'TMO', 'TMUS', 'TSLA',
    'TXN', 'UNH', 'UNP', 'UPS', 'USB', 'V', 'VZ', 'WFC', 'WMT', 'XOM'
]

In [None]:
from transformers import pipeline
from collections import defaultdict
import yfinance as yf
import pandas as pd
import warnings

warnings.simplefilter("ignore", RuntimeWarning)

COUNT = 0

# Use finbert model to create sentiment analysis pipeline for news related each stock 
sentiment_pipeline = pipeline("text-classification", model="ProsusAI/finbert")
SENTIMENT_MAP = {'positive': 1, 'neutral':0, 'negative':-1}
sentiment_dict = {}

def get_sentiment(text):
    result = sentiment_pipeline(text)[0]  # Get first result
    
    return SENTIMENT_MAP[result['label']]  

# Collect Stock Data & Related News
stock_sent_dfs  = []
for symbol in sp100_tickers:
    dat = yf.Ticker(symbol)
    news = dat.get_news()
    if not news:
        continue
        
    # Get the result of sentiment analysis of the news 
    stock_sent_dict = defaultdict(list)
    for n in news:
        stock_sent_dict[pd.to_datetime(n['content']['pubDate']).strftime('%Y-%m-%d')].append(get_sentiment(n['content']['title']))
    data = [{'Date':pd.to_datetime(k), 'sentiment': sum(stock_sent_dict[k]) / len(stock_sent_dict[k])} for k in stock_sent_dict]
    stock_sent_df = pd.DataFrame(data)
    
    # Get recent stock prices
    ticker = yf.Ticker(symbol)
    stock_df = ticker.history(period='20d', interval='1d').reset_index()
    stock_df['Symbol'] = symbol
    stock_df['Date'] = pd.to_datetime(stock_df['Date'].dt.strftime('%Y-%m-%d'))
    stock_sent_df = pd.merge(stock_df, stock_sent_df, on='Date', suffixes=('_yt', '_sentiments'), how='left')
    stock_sent_dfs.append(stock_sent_df)

ss_processed_df = pd.concat(stock_sent_dfs, ignore_index=True)
len(ss_processed_df)

# Save the latest stock prices and sentiment data

In [None]:
from datetime import datetime
ss_processed_df.to_csv(f"{datetime.today().strftime('%Y-%m-%d')}_stock&sentiments.csv")