<a href="https://colab.research.google.com/github/HenryLiu714/Sentiment-Analysis-Model/blob/main/model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Installing and Importing Dependencies**

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import os

In [5]:
# Ensure dependencies are installed

dependencies = [
    "numpy",
    "pandas",
    "matplotlib",
    "tweetnlp"
]

for dependency in dependencies:
  os.system(f"pip3 install {dependency}")

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tweetnlp

In [7]:
os.chdir("drive/MyDrive/sentiment_bot")

**Sentiment Analysis Model Loading**

In [8]:
model = tweetnlp.load_model("sentiment")

Downloading config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/478M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [9]:
def get_sentiment(s: str) -> int:
  sentiment = model.sentiment(s)['label']

  if sentiment == 'positive':
    return 1
  elif sentiment == 'negative':
    return -1
  else:
    return 0

In [10]:
tweets = pd.read_csv('stock.csv', on_bad_lines='skip')

In [11]:
tweets = tweets.dropna()
tweets = tweets[tweets['category']=='stock_images']
tweets = tweets.drop(columns=['tweet_url', 'tweet_type', 'price_of_ticker', 'change_of_ticker', 'category'])

In [12]:
def tickers_to_array(s:str)->list[str]:
  s = s[1:-1].split(', ')
  tickers = []
  for ticker in s:
    tickers.append(ticker[2:-1])
  return tickers

tweets['tickers_mentioned'] = tweets['tickers_mentioned'].transform(lambda x: tickers_to_array(x))
tweets['timestamp'] = tweets['timestamp'].transform(lambda x: pd.Timestamp(x[:10]))

In [13]:
class Trade:
  def __init__(self, ticker: str, dir: bool, strength: int):
    self.ticker = ticker
    self.dir = dir
    self.strength = strength

In [14]:
def get_best_trade(date: pd.Timestamp) -> Trade:
  ticker_data = tweets[tweets['timestamp'] == date]

  tickers = {}

  for index, row in ticker_data.iterrows():
    sentiment = get_sentiment(row['tweet_text'])

    for ticker in row['tickers_mentioned']:
      if ticker not in tickers:
        tickers[ticker] = sentiment
      else:
        tickers[ticker] += sentiment

  best_ticker = max(tickers, key=tickers.get)
  worst_ticker = min(tickers, key=tickers.get)

  if abs(tickers[best_ticker]) > abs(tickers[worst_ticker]):
    return Trade(best_ticker, True, abs(tickers[best_ticker]))
  else:
    return Trade(worst_ticker, False, abs(tickers[worst_ticker]))


In [70]:
import yfinance as yf
import json

cache = {}
with open('cache.json', 'r') as f:
  cache = json.load(f)

In [54]:
class TradeResult:
  def __init__(self, ticker: str, date: str, start: float, end: float):
    self.ticker = ticker
    self.date = date
    self.start = start
    self.end = end
    self.pnl = end - start
  def show_trade(self):
    print(f"Ticker: {self.ticker}")
    print(f"Start: {self.start}")
    print(f"End: {self.end}")
    print(f"Pnl: {self.pnl}")
    print("----------------------\n")

In [86]:
def backtest(starting_balance: int = 100000, port_split: int = 10, commission: float = 0.002, end_date: str = "2023-11-20") -> int:
  cost_per_trade = starting_balance / port_split
  balance = starting_balance
  curr_balance = balance

  #equities we hold :3
  equities = []
  trades = []

  for date in tweets['timestamp'].unique():
    print(f"Date: {date}, Best Trade: {get_best_trade(date).ticker} ")

    trade = get_best_trade(date)
    if trade.dir:
      print("BUYING")
      try:
        # Caching because i dont wanna wait
        if date.strftime('%Y-%m-%d') in cache and cache[date.strftime('%Y-%m-%d')][0] == trade.ticker:
          price = cache[date.strftime('%Y-%m-%d')][1]
        else:
          data = yf.download(trade.ticker, start=date.strftime('%Y-%m-%d'), end=(date + pd.Timedelta(days = 4)).strftime('%Y-%m-%d'))
          price = data.iloc[0]['Adj Close']
          cache[date.strftime('%Y-%m-%d')] = [trade.ticker, price]

        if balance > 0:
          #price = rice # henry slays # henry is a cat # inf/tj men >> #henry has emotions shooketh # henry si more f than t based on prior memories # henry are we done yet :O
          print(f"Current balance: {balance}")
          print(f"Cost of trade: {cost_per_trade}")
          equities.append([trade.ticker, cost_per_trade/price])
          balance -= cost_per_trade

          # Add to trade_results
          trades.append(TradeResult(trade.ticker, date.strftime('%Y-%m-%d'), cost_per_trade, 0))
        else:
          first_trade = equities.pop(0)
          first_price = yf.download(first_trade[0], start=date.strftime('%Y-%m-%d'), end=(date + pd.Timedelta(days = 4)).strftime('%Y-%m-%d')).iloc[0]['Adj Close']
          cash_on_hand = first_price * first_trade[1] * (1 - commission) # After selling first stock :3
          print(f"Cost of trade: {cash_on_hand}")

          equities.append([trade.ticker, cash_on_hand/price])

          # Add to trade results
          trades[-port_split].end = cash_on_hand
          trades.append(TradeResult(trade.ticker, date.strftime('%Y-%m-%d'), cash_on_hand, 0))
      except:
        print(f"Error: Symbol {trade.ticker} not found")


    # Getting all the money (hayun's favorite part >:3)
  for equity in equities:
    data = yf.download(equity[0], start=end_date, end=(pd.Timestamp(end_date) + pd.Timedelta(days = 4)).strftime('%Y-%m-%d'))
    price = data.iloc[0]['Adj Close']

    balance += equity[1] * price

  with open('trades.txt', 'w') as f:
    for trade in trades:
      f.write(f"{trade.date} : {trade.ticker} : From: {trade.start} To: {trade.end} PnL: {trade.pnl}\n")

  return balance, trades


In [77]:
def document_trades(trades: list[TradeResult]):
  with open('trades.txt', 'w') as f:
    for trade in trades:
      f.write(f"{trade.date} : {trade.ticker} : From: {trade.start} To: {trade.end} PnL: {trade.pnl}\n")

In [None]:
returns, trades_made = backtest()

Date: 2021-12-27 00:00:00, Best Trade: COIN 
BUYING
Current balance: 100000
Cost of trade: 10000.0
Date: 2022-01-02 00:00:00, Best Trade: AMD 
BUYING
Current balance: 90000.0
Cost of trade: 10000.0
Date: 2022-01-03 00:00:00, Best Trade: TSLA 
BUYING
Current balance: 80000.0
Cost of trade: 10000.0
Date: 2022-01-04 00:00:00, Best Trade: F 
BUYING
Current balance: 70000.0
Cost of trade: 10000.0
Date: 2022-01-05 00:00:00, Best Trade: CRM 
Date: 2022-01-06 00:00:00, Best Trade: AMC 
Date: 2022-05-19 00:00:00, Best Trade: ^SPX 
Date: 2022-05-20 00:00:00, Best Trade: ES=F 
Date: 2022-05-21 00:00:00, Best Trade: ^SPX 
BUYING
Current balance: 60000.0
Cost of trade: 10000.0
Date: 2022-05-22 00:00:00, Best Trade: AAPL 
BUYING
Current balance: 50000.0
Cost of trade: 10000.0
Date: 2022-05-23 00:00:00, Best Trade: SBUX 
BUYING
Current balance: 40000.0
Cost of trade: 10000.0
Date: 2022-05-24 00:00:00, Best Trade: TWTR 
Date: 2022-05-25 00:00:00, Best Trade: TSLA 
BUYING
Current balance: 30000.0
Cost 

[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 2781.0254944338926





Date: 2022-05-31 00:00:00, Best Trade: RVP 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 6766.259111575727





Date: 2022-06-01 00:00:00, Best Trade: ES=F 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 6158.539404311261





Date: 2022-06-02 00:00:00, Best Trade: ES=F 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 5769.723777507045





Date: 2022-06-03 00:00:00, Best Trade: QQQ 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 10318.52270269739





Date: 2022-06-04 00:00:00, Best Trade: JNJ 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 10191.300032693776





Date: 2022-06-05 00:00:00, Best Trade: SPY 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 10735.77456837287





Date: 2022-06-06 00:00:00, Best Trade: AMZN 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 10828.935891269595





Date: 2022-06-07 00:00:00, Best Trade: SPY 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 9952.973743086983





Date: 2022-06-08 00:00:00, Best Trade: GME 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 9959.280838103583





Date: 2022-06-09 00:00:00, Best Trade: SPY 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 2681.7354259005606





Date: 2022-06-10 00:00:00, Best Trade: SPY 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 6461.031730013642





Date: 2022-06-11 00:00:00, Best Trade: USD 
Date: 2022-06-12 00:00:00, Best Trade: AAPL 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 5623.665975066515





Date: 2022-06-13 00:00:00, Best Trade: SPY 
Date: 2022-06-14 00:00:00, Best Trade: SPY 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 5153.438786886594





Date: 2022-06-15 00:00:00, Best Trade: SPY 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 9510.914958316416





Date: 2022-06-16 00:00:00, Best Trade: SPX 
Date: 2022-06-17 00:00:00, Best Trade: CRSP 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 9770.76956981525





Date: 2022-06-18 00:00:00, Best Trade: XLE 
Date: 2022-06-19 00:00:00, Best Trade: ES=F 
Date: 2022-06-20 00:00:00, Best Trade: FDX 
Date: 2022-06-21 00:00:00, Best Trade: CRSP 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 9801.046622372725





Date: 2022-06-22 00:00:00, Best Trade: PFE 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 9435.474786469145





Date: 2022-06-23 00:00:00, Best Trade: RBLX 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 9071.816787219504





Date: 2022-06-24 00:00:00, Best Trade: PFE 
Date: 2022-06-25 00:00:00, Best Trade: ALTS 
Date: 2022-06-26 00:00:00, Best Trade: PLTR 
Date: 2022-06-27 00:00:00, Best Trade: RACE 
Date: 2022-06-28 00:00:00, Best Trade: MA 
Date: 2022-06-29 00:00:00, Best Trade: SPX 
Date: 2022-06-30 00:00:00, Best Trade: NQ=F 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 8762.229324413443





Date: 2022-07-01 00:00:00, Best Trade: KO 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 2552.6795232801373





Date: 2022-07-02 00:00:00, Best Trade: AA 
Date: 2022-07-03 00:00:00, Best Trade: NVDA 
Date: 2022-07-04 00:00:00, Best Trade: PSJ 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 6345.713204585778





Date: 2022-07-05 00:00:00, Best Trade: DXY 


[*********************100%%**********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['DXY']: YFPricesMissingError('$%ticker%: possibly delisted; No price data found  (1d 2022-07-05 -> 2022-07-09)')


BUYING
$DXY: possibly delisted; No price data found  (1d 2022-07-05 -> 2022-07-09)
Error: Symbol DXY not found
Date: 2022-07-06 00:00:00, Best Trade: SPY 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 6082.247789724629





Date: 2022-07-07 00:00:00, Best Trade: AMZN 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 5374.245053519971





Date: 2022-07-08 00:00:00, Best Trade: ES=F 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 9770.965231714013





Date: 2022-07-09 00:00:00, Best Trade: USD 
Date: 2022-07-10 00:00:00, Best Trade: DUOL 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 10268.042988765485





Date: 2022-07-11 00:00:00, Best Trade: TSLA 
Date: 2022-07-12 00:00:00, Best Trade: LULU 
Date: 2022-07-13 00:00:00, Best Trade: TSLA 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 12206.00844286218





Date: 2022-07-14 00:00:00, Best Trade: SPY 
Date: 2022-07-15 00:00:00, Best Trade: APT 
Date: 2022-07-16 00:00:00, Best Trade: DOCS 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 9738.998984660555





Date: 2022-07-17 00:00:00, Best Trade: INGN 
Date: 2022-07-18 00:00:00, Best Trade: DXY 


[*********************100%%**********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['DXY']: YFPricesMissingError('$%ticker%: possibly delisted; No price data found  (1d 2022-07-18 -> 2022-07-22)')


BUYING
$DXY: possibly delisted; No price data found  (1d 2022-07-18 -> 2022-07-22)
Error: Symbol DXY not found
Date: 2022-07-19 00:00:00, Best Trade: SPY 
Date: 2022-07-20 00:00:00, Best Trade: TSLA 
Date: 2022-07-21 00:00:00, Best Trade: SPX 
Date: 2022-07-22 00:00:00, Best Trade: TSLA 
Date: 2022-07-23 00:00:00, Best Trade: C 
Date: 2022-07-24 00:00:00, Best Trade: AAPL 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 10461.088880841264





Date: 2022-07-25 00:00:00, Best Trade: AMZN 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 9370.437249151217





Date: 2022-07-26 00:00:00, Best Trade: VIX 


[*********************100%%**********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['VIX']: YFPricesMissingError('$%ticker%: possibly delisted; No price data found  (1d 2022-07-26 -> 2022-07-30)')


BUYING
$VIX: possibly delisted; No price data found  (1d 2022-07-26 -> 2022-07-30)
Error: Symbol VIX not found
Date: 2022-07-27 00:00:00, Best Trade: SOXL 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 2493.3622649084086





Date: 2022-07-28 00:00:00, Best Trade: DXY 
Date: 2022-07-29 00:00:00, Best Trade: MSFT 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 6673.863610924296





Date: 2022-07-30 00:00:00, Best Trade: SPX 
Date: 2022-07-31 00:00:00, Best Trade: SPX 
Date: 2022-08-01 00:00:00, Best Trade: FCX 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 6505.957481358079





Date: 2022-08-02 00:00:00, Best Trade: ZM 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 6185.564405253016





Date: 2022-08-03 00:00:00, Best Trade: AMZN 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 10388.812071982762





Date: 2022-08-04 00:00:00, Best Trade: PARA 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 10723.477747284722





Date: 2022-08-05 00:00:00, Best Trade: SPY 
Date: 2022-08-06 00:00:00, Best Trade: NDX 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 14924.99164219196





Date: 2022-08-07 00:00:00, Best Trade: SPY 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 9017.814692113956





Date: 2022-08-08 00:00:00, Best Trade: SPX 
Date: 2022-08-09 00:00:00, Best Trade: ES=F 
Date: 2022-08-10 00:00:00, Best Trade: AAPL 


[*********************100%%**********************]  1 of 1 completed

BUYING
Cost of trade: 11568.145777839349





Date: 2022-08-11 00:00:00, Best Trade: DKNG 
Date: 2022-08-12 00:00:00, Best Trade: DNMR 


In [None]:
out_file = open("cache.json", "w")
out_file.write(json.dumps(cache))
out_file.close()