In [7]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

In [8]:
df_basic = pd.read_csv('basic.csv')
df_fundamental = pd.read_csv('fundamentals.csv')
df_news_sentiment = pd.read_csv('news_sentiment_MSFT.csv')
df_reddit_sentiment = pd.read_csv('Reddit_sentiment_MSFT.csv')

### Import sentiment data

In [13]:
# Convert the 'Date' column to datetime format
df_basic['Date'] = pd.to_datetime(df_basic['Date'])

# Convert the Date column to datetime format
df_fundamental['Report Date'] = pd.to_datetime(df_fundamental['Report Date'])
df_news_sentiment['Date'] = pd.to_datetime(df_news_sentiment['Date'])
df_reddit_sentiment['Date'] = pd.to_datetime(df_reddit_sentiment['Date'])

# Merge the three dataframes based on the nearest timestamp
merged_data = pd.merge_asof(df_basic, df_fundamental, left_on='Date', right_on='Report Date', direction='nearest')
merged_data = pd.merge_asof(merged_data, df_news_sentiment, on='Date')
merged_data = merged_data.rename(columns={'Sentiment': 'News Sentiment'})
merged_data = pd.merge_asof(merged_data, df_reddit_sentiment, on='Date')
merged_data = merged_data.rename(columns={'Sentiment': 'Reddit Sentiment'})


merged_data = merged_data.drop(columns=['Report Date', 'Unnamed: 0'], axis=1)

# Shift the sentiment data up by 1 day (48 rows)
merged_data['News Sentiment'] = merged_data['News Sentiment'].shift(-48)
merged_data['Reddit Sentiment'] = merged_data['Reddit Sentiment'].shift(-48)

# Drop the last 48 rows as they will have NaN values due to the shift
merged_data = merged_data[:-48]


window_size = 7  # 3 before, current, 3 after
sentiment_avg = merged_data['News Sentiment'].rolling(window=window_size, min_periods=1, center=True).apply(lambda x: np.mean(x[~np.isnan(x)]), raw=True)
merged_data['News Sentiment'] = merged_data['News Sentiment'].fillna(sentiment_avg)

merged_data.to_csv('input_data.csv', index=False)

merged_data[400:500]

Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume,Previous Price,Price Difference,RSI,%K,%D,SMA,Revenue,ROE,Net Profit Margin,Current Ratio,Debt Ratio,Equity Per Share,Free Cash Flow,P/B Ratio,EV/EBITDA,P/E Ratio,News Sentiment,Reddit Sentiment
400,2014-02-14 14:30:00,MSFT,37.65,37.78,37.65,37.78,1433676,37.65,0.13,54.70,90.87,87.10,36.64,8.334700e+10,0.25653,0.26908,3.21523,0.14527,10.50264,-1.250100e+10,3.69132,7.74357,13.76442,76.0,60.0
401,2014-02-14 15:00:00,MSFT,37.77,37.78,37.66,37.70,1566811,37.78,-0.08,54.63,87.39,87.83,36.64,8.334700e+10,0.25653,0.26908,3.21523,0.14527,10.50264,-1.250100e+10,3.69132,7.74357,13.76442,76.0,60.0
402,2014-02-14 15:30:00,MSFT,37.71,37.71,37.58,37.61,3405177,37.70,-0.09,55.09,83.48,87.25,36.65,8.334700e+10,0.25653,0.26908,3.21523,0.14527,10.50264,-1.250100e+10,3.69132,7.74357,13.76442,76.0,60.0
403,2014-02-18 09:30:00,MSFT,37.65,37.78,37.51,37.55,5376444,37.61,-0.06,54.35,80.87,83.91,36.65,8.334700e+10,0.25653,0.26908,3.21523,0.14527,10.50264,-1.250100e+10,3.70020,7.76675,13.79752,76.0,60.0
404,2014-02-18 10:00:00,MSFT,37.55,37.62,37.51,37.55,2551745,37.55,0.00,54.85,80.87,81.74,36.66,8.334700e+10,0.25653,0.26908,3.21523,0.14527,10.50264,-1.250100e+10,3.70020,7.76675,13.79752,76.0,60.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,2014-02-27 10:00:00,MSFT,37.28,37.42,37.23,37.40,2321942,37.28,0.12,55.52,60.08,61.11,37.16,8.334700e+10,0.25653,0.26908,3.21523,0.14527,10.50264,-1.250100e+10,3.77912,7.97273,14.09179,80.0,60.0
496,2014-02-27 10:30:00,MSFT,37.41,37.54,37.38,37.50,2611453,37.40,0.10,55.75,64.29,60.23,37.17,8.334700e+10,0.25653,0.26908,3.21523,0.14527,10.50264,-1.250100e+10,3.77912,7.97273,14.09179,80.0,60.0
497,2014-02-27 11:00:00,MSFT,37.50,37.60,37.47,37.50,2494782,37.50,0.00,55.75,64.29,62.89,37.17,8.334700e+10,0.25653,0.26908,3.21523,0.14527,10.50264,-1.250100e+10,3.77912,7.97273,14.09179,80.0,60.0
498,2014-02-27 11:30:00,MSFT,37.51,37.53,37.46,37.53,1605231,37.50,0.03,56.21,65.55,64.71,37.17,8.334700e+10,0.25653,0.26908,3.21523,0.14527,10.50264,-1.250100e+10,3.77912,7.97273,14.09179,70.0,60.0


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class PPO:
    def __init__(self, state_dim, action_dim, hidden_dim, lr, gamma, clip_epsilon, epochs, batch_size):
        self.actor = Actor(state_dim, action_dim, hidden_dim)
        self.critic = Critic(state_dim, hidden_dim)
        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=lr)
        self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=lr)
        self.gamma = gamma
        self.clip_epsilon = clip_epsilon
        self.epochs = epochs
        self.batch_size = batch_size

    def update(self, states, actions, rewards, next_states, dones):
        states = torch.tensor(states, dtype=torch.float)
        actions = torch.tensor(actions, dtype=torch.float)
        rewards = torch.tensor(rewards, dtype=torch.float)
        next_states = torch.tensor(next_states, dtype=torch.float)
        dones = torch.tensor(dones, dtype=torch.float)

        for _ in range(self.epochs):
            indices = np.arange(states.shape[0])
            np.random.shuffle(indices)

            for start in range(0, states.shape[0], self.batch_size):
                end = start + self.batch_size
                batch_indices = indices[start:end]

                batch_states = states[batch_indices]
                batch_actions = actions[batch_indices]
                batch_rewards = rewards[batch_indices]
                batch_next_states = next_states[batch_indices]
                batch_dones = dones[batch_indices]

                # Update critic
                values = self.critic(batch_states)
                next_values = self.critic(batch_next_states)
                targets = batch_rewards + self.gamma * next_values * (1 - batch_dones)
                critic_loss = nn.MSELoss()(values, targets.detach())
                self.critic_optimizer.zero_grad()
                critic_loss.backward()
                self.critic_optimizer.step()

                # Update actor
                old_probs = self.actor(batch_states).gather(1, batch_actions.long().unsqueeze(1)).squeeze()
                new_probs = self.actor(batch_states).gather(1, batch_actions.long().unsqueeze(1)).squeeze()
                ratio = new_probs / (old_probs + 1e-8)
                advantages = targets - values.detach()
                surr1 = ratio * advantages
                surr2 = torch.clamp(ratio, 1 - self.clip_epsilon, 1 + self.clip_epsilon) * advantages
                actor_loss = -torch.min(surr1, surr2).mean()
                self.actor_optimizer.zero_grad()
                actor_loss.backward()
                self.actor_optimizer.step()

    def act(self, state):
        state = torch.tensor(state, dtype=torch.float).unsqueeze(0)
        probs = self.actor(state)
        dist = torch.distributions.Categorical(probs)
        action = dist.sample()
        return action.item()

class Actor(nn.Module):
    def __init__(self, state_dim, action_dim, hidden_dim):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(state_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, action_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.softmax(self.fc3(x), dim=-1)
        return x

class Critic(nn.Module):
    def __init__(self, state_dim, hidden_dim):
        super(Critic, self).__init__()
        self.fc1 = nn.Linear(state_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [5]:
import gym

# Create an instance of the environment
env = gym.make('CartPole-v1')

# Define the hyperparameters
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n
hidden_dim = 64
lr = 0.001
gamma = 0.99
clip_epsilon = 0.2
epochs = 10
batch_size = 64
num_episodes = 1000

# Create an instance of the PPO agent
agent = PPO(state_dim, action_dim, hidden_dim, lr, gamma, clip_epsilon, epochs, batch_size)

# Training loop
for episode in range(num_episodes):
    state = env.reset()
    done = False
    episode_reward = 0

    while not done:
        action = agent.act(state)
        next_stat

ModuleNotFoundError: No module named 'gym'

In [6]:
!pip install gym

Collecting gym
  Downloading gym-0.26.2.tar.gz (721 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m721.7/721.7 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting gym-notices>=0.0.4 (from gym)
  Downloading gym_notices-0.0.8-py3-none-any.whl.metadata (1.0 kB)
Downloading gym_notices-0.0.8-py3-none-any.whl (3.0 kB)
Building wheels for collected packages: gym
  Building wheel for gym (pyproject.toml) ... [?25ldone
[?25h  Created wheel for gym: filename=gym-0.26.2-py3-none-any.whl size=827623 sha256=3fa1104a2c02cf6966004d28ca0525081bd2b3056916f7909e383432ef50107b
  Stored in directory: /Users/aayush/Library/Caches/pip/wheels/1c/77/9e/9af5470201a0b0543937933ee99ba884cd237d2faefe8f4d37
Successfully built gym
Installing colle