### Proxy (To connect OpenAI in China)


In [1]:
import socket
import socks
socks.set_default_proxy(socks.SOCKS5, "127.0.0.1", 7890)
socks.set_default_proxy(socks.HTTP, "127.0.0.1", 7890)
socket.socket = socks.socksocket

### Configs

In [2]:
env_args = {
    "stock_name" : "AAPL",
    "start_date":"2014-01-01",
    "end_date":"2016-01-01"
}

In [3]:
from token_ import OPEN_AI_TOKEN
agent_args = {
    "model_name": "text-curie-001",
    "api_key": OPEN_AI_TOKEN
}

### Env

In [7]:
import gym
import json
import pandas as pd
from easydict import EasyDict as edict
from tqdm.notebook import tqdm

class Tweets_trading_env(gym.Env):
    def __init__(self,args):
        args = edict(args)
        self.stock_name = args.stock_name
        self.start_date = args.start_date
        self.end_date = args.end_date
        self.init_cash = args.init_cash if "init_cash" in args.keys() else 100
        self.init_hold = args.init_cash if "init_hold" in args.keys() else 100
        self.cal_on = args.cal_on if "cal_on" in args.keys() else "Close"
        
        self.read_file(args.stock_name)

    def read_file(self,stock_name):
        # sentences
        sentence_path = f"../../stocknet-dataset/tweet/preprocessed/{stock_name}/"
        self.tweets_df = pd.DataFrame()
        for date in tqdm(pd.date_range(self.start_date,self.end_date)):
            date = date.date().strftime("%Y-%m-%d")
            file_path = f"{sentence_path}/{date}"
            try:
                with open(file_path, "r") as f:
                    sentences = f.readlines()
                sentences_dict = [json.loads(i) for i in sentences]
                sentences = [" ".join(i["text"]) for i in sentences_dict]
                date = [date for i in sentences_dict]
                time_ = [pd.to_datetime(i["created_at"]).time() for i in sentences_dict]
                user_id_str = [i["user_id_str"] for i in sentences_dict]
                temp_df = pd.DataFrame([sentences,date,time_,user_id_str]).T
                # print(temp_df)
                self.tweets_df = pd.concat([self.tweets_df,temp_df])
            except:
                print(f"{date} is missing",end = " ")
        self.tweets_df.columns = ["text","date","time","user_id"]
        self.tweets_df = self.tweets_df.reset_index(drop= True)
        self.date_list = self.tweets_df.sort_values("date")["date"].unique().tolist()

        # Prices
        price_path = f"../../stocknet-dataset/price/raw/{stock_name}.csv"
        self.price_df = pd.read_csv(price_path)
        self.price_df = self.price_df[self.price_df.Date.isin(self.date_list)]
        self.price_df[self.cal_on] /= self.price_df.iloc[0][self.cal_on]
        self.date_list = self.price_df.sort_values("Date")["Date"].unique().tolist()

    def reset(self):
        self.day_index = 0
        self.today = self.date_list[self.day_index]
        self.today_sentences = self.tweets_df[self.tweets_df.date == self.today]
        self.today_price = self.price_df[self.price_df["Date"] == self.today]
        self.today_cash = self.init_cash
        self.today_hold = self.init_hold
        self.terminal = False
        self.asset_memory = [self.init_cash * self.today_price[self.cal_on].item() + self.today_cash]
        self.hold_memory = [self.init_hold]
        self.reward_momory = [0]
        self.action_memory = [0]

        return self.today_sentences.text.tolist()
    
    def update(self):
        if self.day_index >= len(self.date_list)-1:
            self.terminal = True
        else:
            self.day_index += 1
            self.today = self.date_list[self.day_index]
            self.today_sentences = self.tweets_df[self.tweets_df.date == self.today]
            self.today_price = self.price_df[self.price_df["Date"] == self.today]

    def step(self,action):
        last_price = self.today_price[self.cal_on].item()
        last_hold = self.today_hold
        last_cash = self.today_cash
        last_asset = last_price * last_hold + last_cash
        self.update()
        if self.terminal:
            print("all_done")
        else:
            next_price = self.today_price[self.cal_on].item()
            if action >0:
                self.today_hold += 100
                buy_cost = 100* next_price
                self.today_cash -= buy_cost
            
            elif action <0:
                self.today_hold -= 100
                sell_earn = 100* next_price
                self.today_cash -= sell_earn
            
            else:
                pass
                
            next_asset = self.today_hold * next_price +  self.today_cash   
            self.reward = next_asset - last_asset
            self.asset_memory.append(next_asset)
            self.hold_memory.append(self.today_hold)
            self.reward_momory.append(self.reward)
            self.action_memory.append(action)
        
        return self.today_sentences.text.tolist(), self.reward ,self.terminal ,{}
    

### GPT Agent

In [8]:
import openai
import numpy as np
import time

class GPT_agent:
    def __init__(self,args):
        args = edict(args)
        self.model_name = args.model_name
        openai.api_key = args.api_key
   
    def __call__(self,obs:list):
        self.score_list = []
        for o in tqdm(obs):
            self.score_list.append(self.get_sentiment(o))
        self.score_list_mean =  np.mean(self.score_list)
        if self.score_list_mean >0.3:
            return 1
        elif self.score_list_mean <-0.3:
            return -1
        else:
            return 0
    
    def get_sentiment(self,sentence):
        time.sleep(1)
        response = openai.Completion.create(
            model = "text-curie-001",
            prompt = f"Decide whether a sentence's sentiment is positive, neutral, or negative.\n\nSentence: \"{sentence}\"\nSentiment: ",
            temperature=0,
            max_tokens=60,
            top_p=1,
            frequency_penalty=0.5,
            presence_penalty=0
            )
        response = response["choices"][0]["text"]
        if "negative" in response:
            return -1
        elif "Negative" in response:
            return -1
        elif "positive" in response:
            return 1
        elif "Positive" in response:
            return 1
        else:
            return 0 

### Trade

In [9]:
env = Tweets_trading_env(env_args)
agent = GPT_agent(agent_args)
obs = env.reset()
terminal = False
while not terminal:
    print(env.today)
    action = agent(obs)
    obs,reward,terminal,info = env.step(action)

  0%|          | 0/731 [00:00<?, ?it/s]

2014-01-17 is missing 2014-01-18 is missing 2014-06-17 is missing 2014-08-05 is missing 2014-08-06 is missing 2014-09-24 is missing 2014-09-25 is missing 2014-09-26 is missing 2014-09-27 is missing 2014-09-28 is missing 2014-11-10 is missing 2014-11-11 is missing 2014-11-12 is missing 2014-11-13 is missing 2014-11-15 is missing 2014-11-16 is missing 2014-11-17 is missing 2014-11-18 is missing 2014-11-19 is missing 2014-11-20 is missing 2014-11-21 is missing 2014-11-22 is missing 2014-11-23 is missing 2014-12-20 is missing 2015-03-08 is missing 2015-06-05 is missing 2015-06-23 is missing 2015-07-08 is missing 2015-08-15 is missing 2015-09-17 is missing 2015-09-18 is missing 2015-11-27 is missing 2015-12-16 is missing 2015-12-18 is missing 2016-01-01 is missing 2014-01-02


  0%|          | 0/33 [00:00<?, ?it/s]

RateLimitError: Rate limit reached for default-global-with-image-limits in organization org-nSneMDCWcDbih9MwXlWohbdJ on requests per min. Limit: 60.000000 / min. Current: 70.000000 / min. Contact support@openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.

### Useless

In [3]:
import os
import openai

openai.api_key = "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"

response = openai.Completion.create(
  model="text-curie-001",
  prompt="Decide whether a sentence's sentiment is positive, neutral, or negative.\n\nSentence: \"I loved the new Batman movie!\"\nSentiment: ",
  temperature=0,
  max_tokens=60,
  top_p=1,
  frequency_penalty=0.5,
  presence_penalty=0
)

In [53]:
import json
from easydict import EasyDict as edict
file_path = "../../stocknet-dataset/tweet/preprocessed/AAPL/2014-01-01"
with open(file_path, "r") as f:
  sentences = f.readline()

In [55]:
sentences = json.loads(sentences)

In [76]:
sentences["user_id_str"]

'1938270918'

In [66]:
a = pd.to_datetime(sentences["created_at"]).time()

In [74]:
a

datetime.time(3, 59, 3)

In [73]:
a.strftime("%H:%M:%S")

'03-59-03'

In [5]:
env_args = {
    "stock_name" : "AAPL",
    "start_date":"2014-01-01",
    "end_date":"2016-01-01"
}

In [14]:
import pandas as pd
for i in pd.date_range("2014-01-01","2014-01-07"):
    print(i.date())

2014-01-01
2014-01-02
2014-01-03
2014-01-04
2014-01-05
2014-01-06
2014-01-07


In [3]:
import gym
import json
import yfinance as yf
import pandas as pd
from easydict import EasyDict as edict
from tqdm.notebook import tqdm

class Tweets_trading_env(gym.Env):
    def __init__(self,args):
        args = edict(args)
        self.stock_name = args.stock_name
        self.start_date = args.start_date
        self.end_date = args.end_date
        self.init_cash = args.init_cash if "init_cash" in args.keys() else 100
        self.init_hold = args.init_cash if "init_hold" in args.keys() else 100
        self.cal_on = args.cal_on if "cal_on" in args.keys() else "Close"
        
        self.read_file(args.stock_name)

    def read_file(self,stock_name):
        # sentences
        sentence_path = f"../../stocknet-dataset/tweet/preprocessed/{stock_name}/"
        self.tweets_df = pd.DataFrame()
        for date in tqdm(pd.date_range(self.start_date,self.end_date)):
            date = date.date().strftime("%Y-%m-%d")
            file_path = f"{sentence_path}/{date}"
            try:
                with open(file_path, "r") as f:
                    sentences = f.readlines()
                sentences_dict = [json.loads(i) for i in sentences]
                sentences = [" ".join(i["text"]) for i in sentences_dict]
                date = [date for i in sentences_dict]
                time_ = [pd.to_datetime(i["created_at"]).time() for i in sentences_dict]
                user_id_str = [i["user_id_str"] for i in sentences_dict]
                temp_df = pd.DataFrame([sentences,date,time_,user_id_str]).T
                # print(temp_df)
                self.tweets_df = pd.concat([self.tweets_df,temp_df])
            except:
                print(f"{date} is missing")
        self.tweets_df.columns = ["text","date","time","user_id"]
        self.tweets_df = self.tweets_df.reset_index(drop= True)
        self.date_list = self.tweets_df.sort_values("date")["date"].unique().tolist()

        # Prices
        price_path = f"../../stocknet-dataset/price/raw/{stock_name}.csv"
        self.price_df = pd.read_csv(price_path)
        self.price_df = self.price_df[self.price_df.Date.isin(self.date_list)]
        self.price_df[self.cal_on] /= self.price_df.iloc[0][self.cal_on]
        self.date_list = self.price_df.sort_values("Date")["Date"].unique().tolist()

    def reset(self):
        self.day_index = 0
        self.today = self.date_list[self.day_index]
        self.today_sentences = self.tweets_df[self.tweets_df.date == self.today]
        self.today_price = self.price_df[self.price_df["Date"] == self.today]
        self.today_cash = self.init_cash
        self.today_hold = self.init_hold
        self.terminal = False
        self.asset_memory = [self.init_cash * self.today_price[self.cal_on].item() + self.today_cash]
        self.hold_memory = [self.init_hold]
        self.reward_momory = [0]
        self.action_memory = [0]

        return self.today_sentences.text.tolist()
    
    def update(self):
        if self.day_index >= len(self.date_list)-1:
            self.terminal = True
        else:
            self.day_index += 1
            self.today = self.date_list[self.day_index]
            self.today_sentences = self.tweets_df[self.tweets_df.date == self.today]
            self.today_price = self.price_df[self.price_df["Date"] == self.today]

    def step(self,action):
        last_price = self.today_price[self.cal_on].item()
        last_hold = self.today_hold
        last_cash = self.today_cash
        last_asset = last_price * last_hold + last_cash
        self.update()
        if self.terminal:
            print("all_done")
        else:
            next_price = self.today_price[self.cal_on].item()
            if action >0:
                self.today_hold += 100
                buy_cost = 100* next_price
                self.today_cash -= buy_cost
            
            elif action <0:
                self.today_hold -= 100
                sell_earn = 100* next_price
                self.today_cash -= sell_earn
            
            else:
                pass
                
            next_asset = self.today_hold * next_price +  self.today_cash   
            self.reward = next_asset - last_asset
            self.asset_memory.append(next_asset)
            self.hold_memory.append(self.today_hold)
            self.reward_momory.append(self.reward)
            self.action_memory.append(action)
        
        return self.today_sentences.text.tolist(), self.reward ,self.terminal ,{}
    

In [6]:
env = Tweets_trading_env(env_args)

  0%|          | 0/731 [00:00<?, ?it/s]

2014-01-17 is missing
2014-01-18 is missing
2014-06-17 is missing
2014-08-05 is missing
2014-08-06 is missing
2014-09-24 is missing
2014-09-25 is missing
2014-09-26 is missing
2014-09-27 is missing
2014-09-28 is missing
2014-11-10 is missing
2014-11-11 is missing
2014-11-12 is missing
2014-11-13 is missing
2014-11-15 is missing
2014-11-16 is missing
2014-11-17 is missing
2014-11-18 is missing
2014-11-19 is missing
2014-11-20 is missing
2014-11-21 is missing
2014-11-22 is missing
2014-11-23 is missing
2014-12-20 is missing
2015-03-08 is missing
2015-06-05 is missing
2015-06-23 is missing
2015-07-08 is missing
2015-08-15 is missing
2015-09-17 is missing
2015-09-18 is missing
2015-11-27 is missing
2015-12-16 is missing
2015-12-18 is missing
2016-01-01 is missing


In [7]:
df = env.tweets_df

In [8]:
df.to_csv("df.csv")

In [112]:
from tqdm.notebook import tqdm
bar = tqdm(total=df.shape[0])

  0%|          | 0/18365 [00:00<?, ?it/s]

In [110]:
bar.update(1)

True

In [114]:
import time
def get_gpt_res(x):
    sentence = x.text
    global bar
    bar.update(1)
    try:
        response = openai.Completion.create(
                model = "text-curie-001",
                prompt = f"Decide whether a sentence's sentiment is positive, neutral, or negative.\n\nSentence: \"{sentence}\"\nSentiment: ",
                temperature=0,
                max_tokens=60,
                top_p=1,
                frequency_penalty=0.5,
                presence_penalty=0
                )
        response = response["choices"][0]["text"]
        return response
    except:
        time.sleep(1.5)
        return "error"

In [115]:
df["respense"] = df.apply(lambda x:get_gpt_res(x),axis = 1)

In [56]:
env = Tweets_trading_env(env_args)
res = env.reset()
terminal = False
while not terminal:
    print(env.today)
    state,reward,terminal,info = env.step(1)
    

  0%|          | 0/32 [00:00<?, ?it/s]

2014-01-17 is missing
2014-01-18 is missing
2014-01-02
2014-01-03
2014-01-06
2014-01-07
2014-01-08
2014-01-09
2014-01-10
2014-01-13
2014-01-14
2014-01-15
2014-01-16
2014-01-21
2014-01-22
2014-01-23
2014-01-24
2014-01-27
2014-01-28
2014-01-29
2014-01-30
2014-01-31
all_done


['$ aapl - major league baseball is installing apple ibeacons at 20 ballparks -> URL stock stocks stockaction',
 'looking for winners like $ nabi $ furx $ cb $ aapl view now URL',
 'option millionaires : mwm : $ aapl i think the 200 day ma is URL',
 "$ aapl apple get a clue why they don't want to use cash for share buy back . disrupt the finance sector . URL",
 "$ aapl how to get $ 50 off the iphone 6 and galaxy s5 , even though they don't exist - bgr URL",
 "apple snaps up several ' . guru ' domain names on launch day URL $ aapl",
 'rt AT_USER how long before $ goog has a bigger market cap than $ aapl again ? URL',
 'patience is the key for apple traders URL $ aapl',
 '$ aapl - apple , samsung spar over potential u . s . ban on smartphone sales -> URL stock stocks stockaction',
 '$ aapl 5 min support & resistance levels URL',
 'rt AT_USER for defined risk 100 % transparency check us out . 7day free trials . URL $ spy $ goog $ aapl $ vxx $ pcln $ amzn …',
 '$ aapl just under that $ 500

In [61]:
agent_args = {
    "model_name": "text-curie-001",
    "api_key": "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"
}

In [None]:
openai.api_key = "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"

response = openai.Completion.create(
  model="text-curie-001",
  prompt="Decide whether a sentence's sentiment is positive, neutral, or negative.\n\nSentence: \"I loved the new Batman movie!\"\nSentiment: ",
  temperature=0,
  max_tokens=60,
  top_p=1,
  frequency_penalty=0.5,
  presence_penalty=0
)

In [85]:
import openai
import numpy as np
import time

class GPT_agent:
    def __init__(self,args):
        args = edict(args)
        self.model_name = args.model_name
        openai.api_key = args.api_key
   
    def __call__(self,obs:list):
        self.score_list = []
        for o in tqdm(obs):
            self.score_list.append(self.get_sentiment(o))
        self.score_list_mean =  np.mean(self.score_list)
        if self.score_list_mean >0.3:
            return 1
        elif self.score_list_mean <-0.3:
            return -1
        else:
            return 0
    
    def get_sentiment(self,sentence):
        time.sleep(1)
        response = openai.Completion.create(
            model = "text-curie-001",
            prompt = f"Decide whether a sentence's sentiment is positive, neutral, or negative.\n\nSentence: \"{sentence}\"\nSentiment: ",
            temperature=0,
            max_tokens=60,
            top_p=1,
            frequency_penalty=0.5,
            presence_penalty=0
            )
        response = response["choices"][0]["text"]
        if "negative" in response:
            return -1
        elif "Negative" in response:
            return -1
        elif "positive" in response:
            return 1
        elif "Positive" in response:
            return 1
        else:
            return 0 

In [86]:
env = Tweets_trading_env(env_args)
agent = GPT_agent(agent_args)
obs = env.reset()
terminal = False
while not terminal:
    print(env.today)
    action = agent(obs)
    obs,reward,terminal,info = env.step(action)

  0%|          | 0/32 [00:00<?, ?it/s]

2014-01-17 is missing
2014-01-18 is missing
2014-01-02


  0%|          | 0/33 [00:00<?, ?it/s]

2014-01-03


  0%|          | 0/21 [00:00<?, ?it/s]

RateLimitError: Rate limit reached for default-global-with-image-limits in organization org-nSneMDCWcDbih9MwXlWohbdJ on requests per min. Limit: 60.000000 / min. Current: 70.000000 / min. Contact support@openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.

In [74]:
res

1

In [60]:
state

['$ aapl - major league baseball is installing apple ibeacons at 20 ballparks -> URL stock stocks stockaction',
 'looking for winners like $ nabi $ furx $ cb $ aapl view now URL',
 'option millionaires : mwm : $ aapl i think the 200 day ma is URL',
 "$ aapl apple get a clue why they don't want to use cash for share buy back . disrupt the finance sector . URL",
 "$ aapl how to get $ 50 off the iphone 6 and galaxy s5 , even though they don't exist - bgr URL",
 "apple snaps up several ' . guru ' domain names on launch day URL $ aapl",
 'rt AT_USER how long before $ goog has a bigger market cap than $ aapl again ? URL',
 'patience is the key for apple traders URL $ aapl',
 '$ aapl - apple , samsung spar over potential u . s . ban on smartphone sales -> URL stock stocks stockaction',
 '$ aapl 5 min support & resistance levels URL',
 'rt AT_USER for defined risk 100 % transparency check us out . 7day free trials . URL $ spy $ goog $ aapl $ vxx $ pcln $ amzn …',
 '$ aapl just under that $ 500