**Installing and Importing Dependencies**

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [1]:
import os

In [3]:
# Ensure dependencies are installed

dependencies = [
    "numpy",
    "pandas",
    "matplotlib",
    "tweetnlp"
]

for dependency in dependencies:
  os.system(f"pip3 install {dependency}")

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tweetnlp

In [6]:
os.chdir("drive/MyDrive/sentiment_bot")

**Sentiment Analysis Model Loading**

In [7]:
model = tweetnlp.load_model("sentiment")

Downloading config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/478M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [9]:
!ls

BACKTESTING.ipynb  model.ipynb	stock.csv  tweets.csv


In [8]:
def get_sentiment(s: str) -> int:
  sentiment = model.sentiment(s)['label']

  if sentiment == 'positive':
    return 1
  elif sentiment == 'negative':
    return -1
  else:
    return 0

In [10]:
tweets = pd.read_csv('stock.csv', on_bad_lines='skip')

In [11]:
tweets = tweets.dropna()
tweets = tweets[tweets['category']=='stock_images']
tweets = tweets.drop(columns=['tweet_url', 'tweet_type', 'price_of_ticker', 'change_of_ticker', 'category'])

In [12]:
def tickers_to_array(s:str)->list[str]:
  s = s[1:-1].split(', ')
  tickers = []
  for ticker in s:
    tickers.append(ticker[2:-1])
  return tickers

tweets['tickers_mentioned'] = tweets['tickers_mentioned'].transform(lambda x: tickers_to_array(x))
tweets['timestamp'] = tweets['timestamp'].transform(lambda x: pd.Timestamp(x[:10]))

In [13]:
class Trade:
  def __init__(self, ticker: str, dir: bool, strength: int):
    self.ticker = ticker
    self.dir = dir
    self.strength = strength

In [14]:
def get_best_trade(date: pd.Timestamp) -> Trade:
  ticker_data = tweets[tweets['timestamp'] == date]

  tickers = {}

  for index, row in ticker_data.iterrows():
    sentiment = get_sentiment(row['tweet_text'])

    for ticker in row['tickers_mentioned']:
      if ticker not in tickers:
        tickers[ticker] = sentiment
      else:
        tickers[ticker] += sentiment

  best_ticker = max(tickers, key=tickers.get)
  worst_ticker = min(tickers, key=tickers.get)

  if abs(tickers[best_ticker]) > abs(tickers[worst_ticker]):
    return Trade(best_ticker, True, abs(tickers[best_ticker]))
  else:
    return Trade(worst_ticker, False, abs(tickers[worst_ticker]))


In [17]:
#Henry is hot
tweets

Unnamed: 0,timestamp,tweet_text,tickers_mentioned
0,2021-12-27,RT @unusual_whales: $TEVA 2022-02-18 C $9\nUnd...,[TEVA]
1,2021-12-27,RT @unusual_whales: $TEVA 2022-02-18 C $9\nUnd...,[TEVA]
2,2021-12-27,"$TEVA 2022-02-18 C $9\nUnderlying: $8.485, % D...",[TEVA]
3,2021-12-27,Analyst rating upgrades:\n$H 85 &gt; 95\n\nDow...,"[H, CVNA, FL, GME, PAGS, VSCO]"
4,2021-12-27,"$DISCA 2022-02-18 C $15\nUnderlying: $24.46, %...",[DISCA]
...,...,...,...
14357,2023-11-15,RT @coiledspringcap: Everyone has been concern...,[SPX]
14358,2023-11-15,RT @SmartReversals: $IWM - Daily Chart:\n\n✅Ta...,[IWM]
14359,2023-11-15,RT @SmartReversals: $NDX - Daily Chart:\n\n✅Ta...,[NDX]
14360,2023-11-15,RT @SmartReversals: $SPX - Daily Chart:\n\n✅Ta...,[SPX]


In [32]:
date = pd.Timestamp('2021-12-27')
date = date + pd.Timedelta(days = 5)
print(date)

2022-01-01 00:00:00


In [21]:
#henry = squish

import yfinance as yf

**BACKTESTING**

In [25]:
apple = yf.Ticker('AAPL')
# apple.get_info()
google = yf.Ticker('GOOG')
#google.get_info()

In [None]:
def backTest():
  balance = 100000
  trades = []

  #equities we hold :3
  equities =[]

  for date in tweets['timestamp'].unique():
    # print(f"Date: {date}, Best Trade: {get_best_trade(date).ticker} ")
    trade = get_best_trade(date)
    if trade.dir:
      if balance != 0:
        data = yf.download(trade.ticker, start=date.strftime('%Y-%m-%d'), end=(date + pd.Timedelta(days = 5)).strftime('%Y-%m-%d'))
        #price = rice # henry slays # henry is a cat # inf/tj men >> #henry has emotions shooketh # henry si more f than t based on prior memories # henry are we done yet :O
        price = data.iloc[0]['Adj Close']
        equities.append([trade.ticker, 10000/price])


In [31]:
data = yf.download("AAPL", start="2022-01-01", end="2022-01-10")
print(data.iloc[0])

[*********************100%%**********************]  1 of 1 completed

Open         1.778300e+02
High         1.828800e+02
Low          1.777100e+02
Close        1.820100e+02
Adj Close    1.794811e+02
Volume       1.044879e+08
Name: 2022-01-03 00:00:00, dtype: float64



