**Installing and Importing Dependencies**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os

In [None]:
# Ensure dependencies are installed

dependencies = [
    "numpy",
    "pandas",
    "matplotlib",
    "tweetnlp",
    "datetime,"
]

for dependency in dependencies:
  os.system(f"pip3 install {dependency}")

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime

import tweetnlp

In [None]:
os.chdir("drive/MyDrive/sentiment_bot")

**Sentiment Analysis Model Loading**

In [None]:
model = tweetnlp.load_model("sentiment")

In [None]:
def get_sentiment(s: str) -> int:
  sentiment = model.sentiment(s)['label']

  if sentiment == 'positive':
    return 1
  elif sentiment == 'negative':
    return -1
  else:
    return 0

In [None]:
tweets = pd.read_csv('stock.csv', on_bad_lines='skip')

In [None]:
tweets = tweets.dropna()
tweets = tweets[tweets['category']=='stock_images']
tweets = tweets.drop(columns=['tweet_url', 'tweet_type', 'price_of_ticker', 'change_of_ticker', 'category'])

In [None]:
def tickers_to_array(s:str)->list[str]:
  s = s[1:-1].split(', ')
  tickers = []
  for ticker in s:
    tickers.append(ticker[2:-1])
  return tickers

tweets['tickers_mentioned'] = tweets['tickers_mentioned'].transform(lambda x: tickers_to_array(x))
tweets['timestamp'] = tweets['timestamp'].transform(lambda x: pd.Timestamp(x[:10]))

In [None]:
class Trade:
  def __init__(self, ticker: str, dir: bool, strength: int):
    self.ticker = ticker
    self.dir = dir
    self.strength = strength

In [None]:
curr_date = str(datetime.datetime.now())
curr_date = "".join(c for c in curr_date if c.isnumeric())

In [None]:
def get_best_trade(date: pd.Timestamp) -> Trade:
  ticker_data = tweets[tweets['timestamp'] == date]

  tickers = {}

  for index, row in ticker_data.iterrows():
    sentiment = get_sentiment(row['tweet_text'])

    for ticker in row['tickers_mentioned']:
      if ticker not in tickers:
        tickers[ticker] = sentiment
      else:
        tickers[ticker] += sentiment

  best_ticker = max(tickers, key=tickers.get)
  worst_ticker = min(tickers, key=tickers.get)

  if abs(tickers[best_ticker]) > abs(tickers[worst_ticker]):
    return Trade(best_ticker, True, abs(tickers[best_ticker]))
  else:
    return Trade(worst_ticker, False, abs(tickers[worst_ticker]))


In [None]:
import yfinance as yf
import json

cache = {}
with open('cache.json', 'r') as f:
  cache = json.load(f)

In [None]:
class TradeResult:
  def __init__(self, ticker: str, date: str, start: float, end: float, shares: float):
    self.ticker = ticker
    self.date = date
    self.start = start
    self.end = end
    self.shares = shares

  def get_pnl(self):
    return self.end - self.start

  def show_trade(self):
    print(f"Ticker: {self.ticker}")
    print(f"Start: {self.start}")
    print(f"End: {self.end}")
    print(f"Pnl: {self.get_pnl()}")
    print("----------------------\n")

In [None]:
def get_price(ticker:str, date:str)->float:
  data = yf.download(ticker, start=date, end=(pd.Timestamp(date) + pd.Timedelta(days = 4)).strftime('%Y-%m-%d'))
  return data.iloc[0]['Adj Close']

def get_final_balance(equities, balance, date) -> float:
  for equity in equities:
    data = yf.download(equity[0], start=end_date, end=(pd.Timestamp(end_date) + pd.Timedelta(days = 4)).strftime('%Y-%m-%d'))
    price = data.iloc[0]['Adj Close']

    balance += equity[1] * price

def output_trades(trades, balance):
  curr_date = str(datetime.datetime.now())
  curr_date = "".join(c for c in curr_date if c.isnumeric())
  trades_file_name = f"trades_{curr_date}.txt"

  # Write results
  with open(trades_file_name, 'w') as f:
    for trade in trades:
      f.write(f"{trade.date} : {trade.ticker} : From: {trade.start} To: {trade.end} PnL: {trade.get_pnl()}\n")

    f.write(f"Final Balance: {balance}")

def dump_cache():
  out_file = open("cache.json", "w")
  out_file.write(json.dumps(cache))
  out_file.close()

In [None]:
def backtest(starting_balance: int = 100000, port_split: int = 10, commission: float = 0.002, start_date:str="2022-12-27", end_date: str = "2023-11-20") -> int:
  cost_per_trade = starting_balance / port_split
  balance = starting_balance
  curr_balance = balance

  start_time = pd.Timestamp(start_date)
  end_time = pd.Timestamp(end_date)

  #equities we hold :3
  equities = []
  trades = []

  for date in tweets['timestamp'].unique():
    if date < start_time or date > end_time:
      continue

    print(f"Date: {date}, Best Trade: {get_best_trade(date).ticker} ")

    trade = get_best_trade(date)
    if trade.dir:
      try:
        # Caching because i dont wanna wait
        key = date.strftime('%Y-%m-%d') + trade.ticker
        if key in cache:
          price = cache[key][1]
        else:
          price = get_price(trade.ticker, date.strftime('%Y-%m-%d'))
          cache[date.strftime('%Y-%m-%d')] = [trade.ticker, price]

        if balance > 0:
          #price = rice # henry slays # henry is a cat # inf/tj men >> #henry has emotions shooketh # henry si more f than t based on prior memories # henry are we done yet :O
          print(f"Current balance: {balance}")
          print(f"Cost of trade: {cost_per_trade}")

          equities.append([trade.ticker, cost_per_trade/price])
          balance -= cost_per_trade

          # Add to trade_results
          trades.append(TradeResult(trade.ticker, date.strftime('%Y-%m-%d'), cost_per_trade, 0, equities[-1][1]))
        else:
          first_trade = equities.pop(0)
          key = date.strftime('%Y-%m-%d') + first_trade[0]

          if key in cache:
            first_price = cache[key][1]
          else:
            first_price = get_price(trade.ticker, date.strftime('%Y-%m-%d'))
            cache[key] = [first_trade[0], first_price]

          cash_on_hand = first_price * first_trade[1] * (1 - commission) # After selling first stock :3
          print(f"Cost of trade: {cash_on_hand}")

          equities.append([trade.ticker, cash_on_hand/price])

          # Add to trade results
          trades[-port_split].end = cash_on_hand
          trades.append(TradeResult(trade.ticker, date.strftime('%Y-%m-%d'), cash_on_hand, 0, equities[-1][1]))
      except:
        print(f"Error: Symbol {trade.ticker} not found")


  # Getting all the money (hayun's favorite part >:3)
  balance = get_final_balance(equities, balance, end_date)

  # Output and Cleanup
  output_trades(trades, balance)
  dump_cache()

  return balance, trades


In [None]:
returns1, trades_made1 = backtest(start_date="2022-05-21")
returns2, trades_made2 = backtest()