In [1]:
from IPython.core.display import HTML
import requests
import time
import pandas as pd
import datetime
HTML("""
<style>
.container { width:100% !important; }
</style>
""")

### Go here to get API KEY
* https://www.alphavantage.co/support/#api-key

In [3]:
api_key = "2HJMUVOPKGO59FTA"

##### Helper Functions
* _get_data: get data from alpha vantage
* _get_label_sentiment: converts scores to numbers

In [4]:
def _get_data(symbols,time_from,time_to,api_key):
    url = f"https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers={symbols}&time_from={time_from}&time_to={time_to}&limit=1000&apikey={api_key}"
    r = requests.get(url)
    data = r.json()
    return data

def _get_label_sentiment(x):
    if x <= -0.35:
        return 'Bearish','Bearish'
    elif -0.35 < x <= -0.15:
        return 'Somewhat-Bearish','Bearish'
    elif -0.15 < x < 0.15:
        return 'Neutral','Neutral'
    elif 0.15 <= x < 0.35:
        return 'Somewhat_Bullish','Bullish'
    else:  # x >= 0.35
        return 'Bullish','Bullish'

### Get dataset going backward in time
* Initially set time_to = '' meaning till current time
* Then set it to earliest time after each api call
* Set a time_from very far into past
* Eventually this won't work when we are done with all the data...

In [12]:
import time

In [13]:
current_time = time.localtime()

# Format the time as YYYYMMDDTHHMM
formatted_time = time.strftime("%Y%m%dT%H%M", current_time)

In [14]:
def get_dataset(time_from="20030410T0130",
                time_to= formatted_time,
                MAX_API_CALLS_PER_DAY = 25, # Free tier only allows 25 API calls per day
                MAX_API_CALLS_PER_MIN = 5 # Free tier only allows 5 api calls per minute
               ):
    data_list=[]
    for i in range(1,MAX_API_CALLS_PER_DAY+1): 
        if i%5==0: 
            time.sleep(60)
        
        data=_get_data('TSLA',time_from,time_to,api_key)
        if 'feed' not in data:break
        if len(data['feed'])==0: break
        data_list.append(data)
        time_to=data['feed'][-1]['time_published'][:-2] # Take all the way up to last 2 since api only takes minute level granularity
    df=pd.concat([pd.DataFrame(data['feed']) for data in data_list])
    # Extract TSLA specific relevance (we didn't use it in video)
    df['ticker_relevance_TSLA']=df['ticker_sentiment'].apply(lambda l:[el for el in l if el['ticker']=='TSLA'][0]['relevance_score']).astype(float)
    # Extract TSLA specific sentiment
    df['ticker_sentiment_TSLA']=df['ticker_sentiment'].apply(lambda l:[el for el in l if el['ticker']=='TSLA'][0]['ticker_sentiment_score']).astype(float)
    # Only take tickers with TSLA in headline 
    df=df[df.title.str.contains('tsla|tesla',case=False)]
    # Extract # of tickers
    df['num_tickers']=df.ticker_sentiment.apply(lambda l:len(l))
    # Only take when # of tickers = 1
    df = df[df.num_tickers==1]
    # Applying the function and creating two new columns
    df[['detailed_original_label','label']] = df.apply(lambda row: _get_label_sentiment(row['ticker_sentiment_TSLA']), axis=1, result_type='expand')
    # Drop duplicates..
    df.drop_duplicates(subset=['summary'],inplace=True,keep='first')
    # Set index to time published
    df.set_index('time_published',inplace=True)
    # Sort by time published
    df.sort_index(inplace=True)
    return df
    
        

### Run this to get dataset and save to CSV file
* Takes 5 mins LOL!
* You can stop here after saving the CSV and can run the colab notebook...

In [21]:
df = get_dataset(time_to='')
df.to_csv('tsla_sentiment.csv')

ValueError: No objects to concatenate

In [25]:
df

Unnamed: 0_level_0,title,url,authors,summary,banner_image,source,category_within_source,source_domain,topics,overall_sentiment_score,overall_sentiment_label,ticker_sentiment,ticker_relevance_TSLA,ticker_sentiment_TSLA,num_tickers,detailed_original_label,label
time_published,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
20231106T192159,'Tesla Is Back': Jim Cramer Expects Stock Rall...,https://www.benzinga.com/trading-ideas/long-id...,['Adam Eckert'],Jim Cramer says Tesla Inc TSLA is back after t...,https://cdn.benzinga.com/files/images/story/20...,Benzinga,General,www.benzinga.com,"[{'topic': 'Financial Markets', 'relevance_sco...",0.046212,Neutral,"[{'ticker': 'TSLA', 'relevance_score': '0.9122...",0.912237,0.123216,1,Neutral,Neutral
20231107T065956,India Fast-Tracks Approvals To Welcome Elon Mu...,https://www.benzinga.com/news/23/11/35636075/i...,['Shomik Sen Bhattacharjee'],This story was first published on the Benzinga...,https://cdn.benzinga.com/files/images/story/20...,Benzinga,News,www.benzinga.com,"[{'topic': 'Financial Markets', 'relevance_sco...",0.199924,Somewhat-Bullish,"[{'ticker': 'TSLA', 'relevance_score': '0.6919...",0.691995,0.333594,1,Somewhat_Bullish,Bullish
20231107T085136,Tesla Model Y To Get Pricier In China: Report ...,https://www.benzinga.com/news/23/11/35636935/t...,['Anan Ashraf'],EV giant Tesla Inc TSLA is reportedly gearing ...,https://cdn.benzinga.com/files/images/story/20...,Benzinga,News,www.benzinga.com,"[{'topic': 'Manufacturing', 'relevance_score':...",0.040358,Neutral,"[{'ticker': 'TSLA', 'relevance_score': '0.5611...",0.561184,-0.044547,1,Neutral,Neutral
20231107T122000,Port Workers Join Tesla Strike In Sweden,https://www.barrons.com/news/port-workers-join...,['AFP - Agence France Presse'],Workers at four Swedish ports blocked the load...,https://www.barrons.com/asset/external-media/a...,Barrons,,www.barrons.com,"[{'topic': 'Manufacturing', 'relevance_score':...",0.009616,Neutral,"[{'ticker': 'TSLA', 'relevance_score': '0.8708...",0.870862,-0.083327,1,Neutral,Neutral
20231107T224331,China Plans To 'Reshape The World' By Mass Pro...,https://www.benzinga.com/news/23/11/35656672/c...,['Adam Eckert'],China unveiled plans to mass-produce humanoid ...,https://cdn.benzinga.com/files/images/story/20...,Benzinga,General,www.benzinga.com,"[{'topic': 'Manufacturing', 'relevance_score':...",0.329900,Somewhat-Bullish,"[{'ticker': 'TSLA', 'relevance_score': '0.4926...",0.492659,0.457138,1,Bullish,Bullish
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20250114T055924,Biden Admin Brushes Off Tesla's $100M Big-Rig ...,https://www.benzinga.com/25/01/42966137/biden-...,['Anan Ashraf'],Biden administration reportedly brushed off Te...,https://cdn.benzinga.com/files/images/story/20...,Benzinga,News,www.benzinga.com,"[{'topic': 'Manufacturing', 'relevance_score':...",0.014141,Neutral,"[{'ticker': 'TSLA', 'relevance_score': '0.7290...",0.729067,-0.034485,1,Neutral,Neutral
20250114T085555,Audi's Deliveries Drop 12% In 2024: Tesla Take...,https://www.benzinga.com/tech/25/01/42967305/a...,['Anan Ashraf'],German automaker Audi delivered around 1.7 mil...,https://cdn.benzinga.com/files/images/story/20...,Benzinga,News,www.benzinga.com,"[{'topic': 'Manufacturing', 'relevance_score':...",0.038965,Neutral,"[{'ticker': 'TSLA', 'relevance_score': '0.4514...",0.451494,0.180245,1,Somewhat_Bullish,Bullish
20250114T173625,"Tesla Model Y Refresh Gets 50,000 Reservations...",https://www.benzinga.com/news/global/25/01/429...,['Chris Katje'],Tesla recently announced the Model Y refresh i...,https://cdn.benzinga.com/files/images/story/20...,Benzinga,Trading,www.benzinga.com,"[{'topic': 'Financial Markets', 'relevance_sco...",0.217568,Somewhat-Bullish,"[{'ticker': 'TSLA', 'relevance_score': '0.8191...",0.819198,0.346723,1,Somewhat_Bullish,Bullish
20250114T224522,Tesla ( TSLA ) Stock Falls Amid Market Uptic...,https://www.zacks.com/stock/news/2396864/tesla...,['Zacks Equity Research'],Tesla (TSLA) reachead $396.36 at the closing o...,https://staticx-tuner.zacks.com/images/default...,Zacks Commentary,,www.zacks.com,"[{'topic': 'Earnings', 'relevance_score': '0.9...",0.123939,Neutral,"[{'ticker': 'TSLA', 'relevance_score': '0.6880...",0.688096,0.238901,1,Somewhat_Bullish,Bullish


### If you want more data you can run this the next day starting at earliest publish date and going backwards in time.
* Load dataset so far 
* Find earliest published time
* Can give you duplicates when you are done with data. I didn't handle it in this code so drop duplicates maybe by URL,published_time,summary...

In [27]:
df = pd.read_csv('tsla_sentiment.csv',index_col='time_published').sort_index()
earliest_time_published = df.index[0]
earliest_time_published

'20231106T192159'

#### Get more data

In [28]:
df_new = get_dataset(time_to=earliest_time_published[:-2])

ValueError: No objects to concatenate

### Combine the dataframes and save to a csv

In [31]:
# df = pd.concat([df_new,df])
df.drop_duplicates(subset=['summary'],inplace=True,keep='first')
df.to_csv(r'D:\Downloads\Datasets\tsla_sentiment.csv')

### When we run out of dates or want to add new articles we don't have we can now go forward in time from latest published time. WE can run this like every day or every few days and we should have mostly everything
* Find latest time
* We take nearest minute forward not backwards as we did when going backwards in time
* We set the time_from to this...

In [None]:
df = pd.read_csv('tsla_sentiment.csv',index_col='time_published').sort_index()

latest_time_published = df.index[-1]

# Parse the timestamp string into a datetime object
dt = datetime.datetime.strptime(latest_time_published[:-2], '%Y%m%dT%H%M')

# Round the datetime to the nearest second
rounded_dt = dt + datetime.timedelta(minutes=1)

# Format the rounded datetime back to the string format
next_timestamp = rounded_dt.strftime('%Y%m%dT%H%M')
 

### Combine the dataframes and save to a csv

In [None]:
df_new = get_dataset(time_from=next_timestamp,time_to='')

### Combine the dataframes and save to a csv

In [None]:
df=pd.concat([df,df_new])
df.drop_duplicates(subset=['summary'],inplace=True,keep='first')
df.to_csv('tsla_sentiment.csv')

### Explanation

In [36]:
data=_get_data('TSLA',"20030410T0130","",api_key)

In [38]:
data.keys()

dict_keys(['items', 'sentiment_score_definition', 'relevance_score_definition', 'feed'])

In [39]:
data['items']

'692'

In [40]:
data['sentiment_score_definition']

'x <= -0.35: Bearish; -0.35 < x <= -0.15: Somewhat-Bearish; -0.15 < x < 0.15: Neutral; 0.15 <= x < 0.35: Somewhat_Bullish; x >= 0.35: Bullish'

In [41]:
data['relevance_score_definition']

'0 < x <= 1, with a higher score indicating higher relevance.'

In [44]:
df=pd.DataFrame(data['feed'])

In [47]:
df['ticker_sentiment'].iloc[0]

[{'ticker': 'TSLA',
  'relevance_score': '0.055236',
  'ticker_sentiment_score': '-0.023999',
  'ticker_sentiment_label': 'Neutral'},
 {'ticker': 'CRYPTO:BTC',
  'relevance_score': '0.164656',
  'ticker_sentiment_score': '-0.0489',
  'ticker_sentiment_label': 'Neutral'},
 {'ticker': 'CRYPTO:DOGE',
  'relevance_score': '0.110209',
  'ticker_sentiment_score': '-0.026718',
  'ticker_sentiment_label': 'Neutral'},
 {'ticker': 'CRYPTO:SOL',
  'relevance_score': '0.055236',
  'ticker_sentiment_score': '0.215945',
  'ticker_sentiment_label': 'Somewhat-Bullish'}]