In [None]:
import pandas as pd

df = pd.read_csv("/content/infy_merged.csv", header=1)

# Rename columns properly
df.columns = ['Date', 'Close', 'High', 'Low', 'Open', 'Volume', 'Quarter',
              'Sentiment_Score', 'Profit_Growth', 'Sales_Growth', 'OPM', 'PE_Ratio']

# Drop any remaining NaN rows if needed
df = df.dropna()

# Check the first few rows
df.head()


Unnamed: 0,Date,Close,High,Low,Open,Volume,Quarter,Sentiment_Score,Profit_Growth,Sales_Growth,OPM,PE_Ratio
1,04-01-2021,15.62609768,15.78915289,15.45398366,15.75291763,12597600,2021Q4,0.836963,-1.2,2.8,24.5,31.7
2,05-01-2021,15.98844147,16.0065591,15.6895075,15.6895075,8109900,2021Q4,0.836963,-1.2,2.8,24.5,31.7
3,06-01-2021,16.06091309,16.11526598,15.70762789,15.76197906,9136300,2021Q4,0.836963,-1.2,2.8,24.5,31.7
4,07-01-2021,15.89785671,15.89785671,15.63515797,15.7257444,10272000,2021Q4,0.836963,-1.2,2.8,24.5,31.7
5,08-01-2021,16.83995247,16.8580701,16.32361209,16.36890444,17802400,2021Q4,0.836963,-1.2,2.8,24.5,31.7


In [None]:
df.columns

Index(['Date', 'Close', 'High', 'Low', 'Open', 'Volume', 'Quarter',
       'Sentiment_Score', 'Profit_Growth', 'Sales_Growth', 'OPM', 'PE_Ratio'],
      dtype='object')

In [None]:
import gym
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import random
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque

In [None]:
scaler = StandardScaler()

df[['Close', 'High', 'Low', 'Open', 'Volume', 'PE_Ratio', 'Profit_Growth', 'Sales_Growth', 'OPM']] = scaler.fit_transform(
    df[['Close', 'High', 'Low', 'Open', 'Volume', 'PE_Ratio', 'Profit_Growth', 'Sales_Growth', 'OPM']]
)

In [None]:
class StockTradingEnv(gym.Env):
    def __init__(self, df):
        super(StockTradingEnv, self).__init__()
        self.df = df.reset_index()
        self.current_step = 0
        self.cash = 10000  # Initial balance
        self.stock_held = 0
        self.total_value = self.cash
        self.max_steps = len(self.df) - 1

        self.action_space = gym.spaces.Discrete(3)  # 0: Hold, 1: Buy, 2: Sell
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        self.cash = 10000
        self.stock_held = 0
        self.total_value = self.cash
        return self._next_observation()

    def _next_observation(self):
        obs = self.df.iloc[self.current_step][['Close', 'Sentiment_Score', 'Profit_Growth', 'Sales_Growth', 'OPM', 'PE_Ratio', 'Volume']].values
        return np.array(obs, dtype=np.float32)

    def step(self, action):
        prev_total = self.total_value
        price = self.df.iloc[self.current_step]['Close']
        sentiment = self.df.iloc[self.current_step]['Sentiment_Score']

        if action == 1 and self.cash >= price:  # Buy
            self.stock_held += 1
            self.cash -= price
        elif action == 2 and self.stock_held > 0:  # Sell
            self.stock_held -= 1
            self.cash += price

        self.current_step += 1
        done = self.current_step >= self.max_steps

        self.total_value = self.cash + (self.stock_held * price)
        profit = self.total_value - prev_total
        reward = profit + (0.1 * sentiment)  # Sentiment-weighted reward

        # Track trade history
        if not hasattr(self, "trade_history"):
          self.trade_history = []

        self.trade_history.append({
          "Step": self.current_step,
          "Date": self.df.iloc[self.current_step]['Date'],
          "Action": "Buy" if action == 1 else "Sell" if action == 2 else "Hold",
          "Stock Held": self.stock_held,
          "Cash": self.cash,
          "Total Value": self.total_value,
          "Sentiment": sentiment,
          "Stock Price": price
})


        return self._next_observation(), reward, done, {}

    def render(self):
        print(f"Step: {self.current_step}, Cash: {self.cash}, Stocks: {self.stock_held}, Total Value: {self.total_value}")


In [None]:
class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

In [None]:
env = StockTradingEnv(df)
dqn = DQN(env.observation_space.shape[0], env.action_space.n)
optimizer = optim.Adam(dqn.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

# Training Loop
num_episodes = 100
gamma = 0.95
epsilon = 1.0
epsilon_decay = 0.995
epsilon_min = 0.1
memory = deque(maxlen=1000)

In [None]:
for episode in range(num_episodes):
    state = env.reset()
    state = torch.tensor(state, dtype=torch.float32)
    total_reward = 0
    done = False

    while not done:
        if random.random() < epsilon:
            action = env.action_space.sample()
        else:
            with torch.no_grad():
                action = torch.argmax(dqn(state)).item()

        next_state, reward, done, _ = env.step(action)
        next_state = torch.tensor(next_state, dtype=torch.float32)
        memory.append((state, action, reward, next_state, done))
        state = next_state
        total_reward += reward

        if len(memory) > 32:
            batch = random.sample(memory, 32)

            states, actions, rewards, next_states, dones = zip(*batch)
            states = torch.stack(states)
            actions = torch.tensor(actions, dtype=torch.int64)
            rewards = torch.tensor(rewards, dtype=torch.float32)
            next_states = torch.stack(next_states)
            dones = torch.tensor(dones, dtype=torch.float32)

            q_values = dqn(states).gather(1, actions.unsqueeze(1)).squeeze(1)
            next_q_values = dqn(next_states).max(1)[0].detach()
            target_q_values = rewards + (gamma * next_q_values * (1 - dones))

            loss = loss_fn(q_values, target_q_values)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    epsilon = max(epsilon_min, epsilon * epsilon_decay)
    print(f"Episode {episode + 1}, Total Reward: {total_reward}")


Episode 1, Total Reward: 79.60666718385932
Episode 2, Total Reward: 87.313758027763
Episode 3, Total Reward: 63.546304976224974
Episode 4, Total Reward: 72.09546547090716
Episode 5, Total Reward: 48.973354786088414
Episode 6, Total Reward: 50.55668882742402
Episode 7, Total Reward: 75.2634982115919
Episode 8, Total Reward: 44.194581080223806
Episode 9, Total Reward: 67.51709630950576
Episode 10, Total Reward: 61.509843381999566
Episode 11, Total Reward: 92.88248861417509
Episode 12, Total Reward: 59.83577756720608
Episode 13, Total Reward: 45.9135207086852
Episode 14, Total Reward: 73.15553405181828
Episode 15, Total Reward: 58.436588001796956
Episode 16, Total Reward: 80.31832692481784
Episode 17, Total Reward: 68.81429953090218
Episode 18, Total Reward: 45.28503201410218
Episode 19, Total Reward: 84.60051407853807
Episode 20, Total Reward: 49.547505390084204
Episode 21, Total Reward: 69.8537295514846
Episode 22, Total Reward: 19.155090243840775
Episode 23, Total Reward: 42.0763883126

In [None]:
import pandas as pd

# Convert trade history to DataFrame
trade_log = pd.DataFrame(env.trade_history)

# Save to CSV
trade_log.to_csv("trade_history.csv", index=False)

print("Trade history saved to trade_history.csv")


Trade history saved to trade_history.csv
