In [None]:
import pandas as pd

df = pd.read_csv("/content/infy_merged_2.csv", header=1)

# Rename columns properly
df.columns = ['Date', 'Close', 'High', 'Low', 'Open', 'Volume']

# Drop any remaining NaN rows if needed
df = df.dropna()

# Check the first few rows
df.head()


FileNotFoundError: [Errno 2] No such file or directory: '/content/infy_merged_2.csv'

In [None]:
df.columns

Index(['Date', 'Close', 'High', 'Low', 'Open', 'Volume'], dtype='object')

In [None]:
import gym
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import random
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque

In [None]:
scaler = StandardScaler()

df[['Close', 'High', 'Low', 'Open', 'Volume']] = scaler.fit_transform(
    df[['Close', 'High', 'Low', 'Open', 'Volume']]
)

In [None]:
import gym
import numpy as np

class StockTradingEnvNoSentiment(gym.Env):
    def __init__(self, df):
        super(StockTradingEnvNoSentiment, self).__init__()
        self.df = df.reset_index()
        self.current_step = 0
        self.cash = 10000  # Initial balance
        self.stock_held = 0
        self.total_value = self.cash
        self.max_steps = len(self.df) - 1

        self.action_space = gym.spaces.Discrete(3)  # 0: Hold, 1: Buy, 2: Sell
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        self.cash = 10000
        self.stock_held = 0
        self.total_value = self.cash
        return self._next_observation()

    def _next_observation(self):
        obs = self.df.iloc[self.current_step][['Close', 'Volume']].values
        return np.array(obs, dtype=np.float32)

    def step(self, action):
        prev_total = self.total_value
        price = self.df.iloc[self.current_step]['Close']

        if action == 1 and self.cash >= price:  # Buy
            self.stock_held += 1
            self.cash -= price
        elif action == 2 and self.stock_held > 0:  # Sell
            self.stock_held -= 1
            self.cash += price

        self.current_step += 1
        done = self.current_step >= self.max_steps

        self.total_value = self.cash + (self.stock_held * price)
        profit = self.total_value - prev_total
        reward = profit  # No sentiment influence

        # Track trade history
        if not hasattr(self, "trade_history"):
            self.trade_history = []

        self.trade_history.append({
            "Step": self.current_step,
            "Date": self.df.iloc[self.current_step]['Date'],
            "Action": "Buy" if action == 1 else "Sell" if action == 2 else "Hold",
            "Stock Held": self.stock_held,
            "Cash": self.cash,
            "Total Value": self.total_value,
            "Stock Price": price
        })

        return self._next_observation(), reward, done, {}

    def render(self):
        print(f"Step: {self.current_step}, Cash: {self.cash}, Stocks: {self.stock_held}, Total Value: {self.total_value}")


In [None]:
class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

In [None]:
env = StockTradingEnvNoSentiment(df)
dqn = DQN(env.observation_space.shape[0], env.action_space.n)
optimizer = optim.Adam(dqn.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

# Training Loop
num_episodes = 100
gamma = 0.95
epsilon = 1.0
epsilon_decay = 0.995
epsilon_min = 0.1
memory = deque(maxlen=1000)

In [None]:
for episode in range(num_episodes):
    state = env.reset()
    state = torch.tensor(state, dtype=torch.float32)
    total_reward = 0
    done = False

    while not done:
        if random.random() < epsilon:
            action = env.action_space.sample()
        else:
            with torch.no_grad():
                action = torch.argmax(dqn(state)).item()

        next_state, reward, done, _ = env.step(action)
        next_state = torch.tensor(next_state, dtype=torch.float32)
        memory.append((state, action, reward, next_state, done))
        state = next_state
        total_reward += reward

        if len(memory) > 32:
            batch = random.sample(memory, 32)

            states, actions, rewards, next_states, dones = zip(*batch)
            states = torch.stack(states)
            actions = torch.tensor(actions, dtype=torch.int64)
            rewards = torch.tensor(rewards, dtype=torch.float32)
            next_states = torch.stack(next_states)
            dones = torch.tensor(dones, dtype=torch.float32)

            q_values = dqn(states).gather(1, actions.unsqueeze(1)).squeeze(1)
            next_q_values = dqn(next_states).max(1)[0].detach()
            target_q_values = rewards + (gamma * next_q_values * (1 - dones))

            loss = loss_fn(q_values, target_q_values)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    epsilon = max(epsilon_min, epsilon * epsilon_decay)
    print(f"Episode {episode + 1}, Total Reward: {total_reward}")


Episode 1, Total Reward: 20.038962444188655
Episode 2, Total Reward: 3.967315986399626
Episode 3, Total Reward: 0.4021821436163009
Episode 4, Total Reward: 25.015680785119912
Episode 5, Total Reward: 12.926405707295999
Episode 6, Total Reward: 11.288920732422412
Episode 7, Total Reward: 14.845163845493516
Episode 8, Total Reward: 18.56522567859065
Episode 9, Total Reward: -15.349400145725667
Episode 10, Total Reward: -18.167089078231584
Episode 11, Total Reward: 18.929624808319204
Episode 12, Total Reward: 23.89604348750072
Episode 13, Total Reward: 2.6697660980535147
Episode 14, Total Reward: 10.410655273974044
Episode 15, Total Reward: 5.125544294005522
Episode 16, Total Reward: 13.950859675340325
Episode 17, Total Reward: 17.658680268919852
Episode 18, Total Reward: 2.867424174100961
Episode 19, Total Reward: -3.8365747530533554
Episode 20, Total Reward: -19.60394644349617
Episode 21, Total Reward: 4.809046560876595
Episode 22, Total Reward: -6.713278512961551
Episode 23, Total Rewa

In [None]:
import pandas as pd

# Convert trade history to DataFrame
trade_log = pd.DataFrame(env.trade_history)

# Save to CSV
trade_log.to_csv("trade_history2.csv", index=False)

print("Trade history saved to trade_history.csv")


Trade history saved to trade_history.csv
