In [None]:
import pandas as pd

articles = pd.read_csv("/content/final.csv")
q_sent = pd.read_csv("/content/HDFC.csv")

# Drop rows with invalid dates in articles
article_date_col = next((col for col in articles.columns if 'date' in col.lower()), None)
quarter_date_col = 'Date'

articles[article_date_col] = pd.to_datetime(articles[article_date_col], errors='coerce')
q_sent[quarter_date_col] = pd.to_datetime(q_sent[quarter_date_col], errors='coerce')

articles = articles.dropna(subset=[article_date_col])

# Aggregate daily news sentiment (mean)
daily_news = (
    articles
    .groupby(articles[article_date_col].dt.date)['sentiment_score']
    .mean()
    .reset_index()
    .rename(columns={article_date_col: 'Date', 'sentiment_score': 'daily_news_sentiment'})
)
daily_news['Date'] = pd.to_datetime(daily_news['Date'])

# Preserve all columns from HDFC.csv
# Merge on Date
final_df = pd.merge(q_sent, daily_news, on='Date', how='left')

# Fill days without news
final_df['daily_news_sentiment'] = final_df['daily_news_sentiment'].fillna(0)

# Save to CSV
output_path = 'final_sentiment.csv'
final_df.to_csv(output_path, index=False)

# Optionally: print or return a preview
print(final_df.head())


        Date           Adj Close               Close                High  \
0        NaT         HDFCBANK.NS         HDFCBANK.NS         HDFCBANK.NS   
1 2022-04-01    1451.98486328125              1506.0              1510.0   
2 2022-04-04   1597.376220703125   1656.800048828125  1722.0999755859375   
3 2022-04-05  1550.5675048828125             1608.25   1666.699951171875   
4 2022-04-06   1495.226318359375  1550.8499755859375   1589.800048828125   

                  Low                Open       Volume quarter  \
0         HDFCBANK.NS         HDFCBANK.NS  HDFCBANK.NS     NaN   
1   1470.300048828125  1476.4000244140625      7284278  2023Q1   
2   1562.550048828125              1580.0     48725485  2023Q1   
3              1602.0   1666.699951171875     15461291  2023Q1   
4  1547.3499755859375   1587.699951171875     13513930  2023Q1   

   report_sentiment  daily_news_sentiment  
0               NaN                   0.0  
1           0.72623                   0.0  
2           0.

In [None]:
final_df.columns

Index(['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume',
       'quarter', 'report_sentiment', 'daily_news_sentiment'],
      dtype='object')

In [None]:
final_df.to_csv("hdfc_merged.csv")

In [None]:
final_df["daily_news_sentiment"]

Unnamed: 0,daily_news_sentiment
0,0.000000
1,0.000000
2,0.000000
3,0.000000
4,0.000000
...,...
673,0.807341
674,0.874686
675,0.869924
676,0.868356


In [None]:
# Convert sentiment columns to numeric
final_df["report_sentiment"] = pd.to_numeric(final_df["report_sentiment"], errors="coerce")
final_df["daily_news_sentiment"] = pd.to_numeric(final_df["daily_news_sentiment"], errors="coerce")


In [None]:
'''
Apply a rolling z‑score (e.g. 20‑day window) to both report_sentiment and daily_news_sentiment so they’re on comparable scales.

Clip outliers (e.g. beyond ±3 σ) to stabilize learning.
'''

for col in ["report_sentiment", "daily_news_sentiment"]:
    rolling_mean = final_df[col].rolling(window=20, min_periods=1).mean()
    rolling_std = final_df[col].rolling(window=20, min_periods=1).std(ddof=0)
    final_df[f"{col}_z"] = ((final_df[col] - rolling_mean) / rolling_std).clip(-3, 3)

In [None]:
cols_to_numeric = ["Open", "High", "Low", "Close", "Adj Close", "Volume", "report_sentiment", "daily_news_sentiment"]
for col in cols_to_numeric:
    final_df[col] = pd.to_numeric(final_df[col], errors="coerce")


In [None]:
# Remove commas and convert to numeric
final_df["Volume"] = final_df["Volume"].astype(str).str.replace(",", "").astype(float)


In [None]:
# Momentum over 5, 10, 20 days
for n in [5, 10, 20]:
    final_df[f"momentum_{n}"] = final_df["Close"].pct_change(n)

# Volatility over 10 and 20 days
for n in [10, 20]:
    final_df[f"volatility_{n}"] = final_df["Close"].pct_change().rolling(window=n).std()

# Volume flow (price_change × volume)
final_df["price_change"] = final_df["Close"].pct_change()
final_df["price_change"] = pd.to_numeric(final_df["price_change"], errors="coerce")
final_df["price_change"] = final_df["price_change"].astype(str).str.replace(",", "").astype(float)
final_df["volume_flow"] = final_df["price_change"] * final_df["Volume"]
final_df["volume_flow_ma10"] = final_df["volume_flow"].rolling(window=10).mean()


In [None]:
# Create time-lagged features for last 5 days of sentiment z-scores
K = 5
for col in ["report_sentiment_z", "daily_news_sentiment_z"]:
    for lag in range(1, K + 1):
        final_df[f"{col}_lag{lag}"] = final_df[col].shift(lag)


In [None]:
# Drop early rows with NaNs introduced by rolling/lagging
final_df_clean = final_df.dropna().reset_index(drop=True)


In [None]:
#final_df_clean.to_csv("rl_ready_sentiment_features.csv", index=False)
final_df_clean

Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume,quarter,report_sentiment,daily_news_sentiment,...,report_sentiment_z_lag1,report_sentiment_z_lag2,report_sentiment_z_lag3,report_sentiment_z_lag4,report_sentiment_z_lag5,daily_news_sentiment_z_lag1,daily_news_sentiment_z_lag2,daily_news_sentiment_z_lag3,daily_news_sentiment_z_lag4,daily_news_sentiment_z_lag5
0,2022-07-08,1362.652222,1397.099976,1410.000000,1394.750000,1410.000000,6237015.0,2023Q2,0.733383,0.000000,...,1.732051,2.000000,2.380476,3.000000,3.000000,0.773016,-0.987840,-0.987840,1.086763,-0.988101
1,2022-07-11,1371.479126,1406.150024,1409.900024,1393.250000,1394.000000,3731416.0,2023Q2,0.733383,0.000000,...,1.527525,1.732051,2.000000,2.380476,3.000000,-0.895255,0.773016,-0.987840,-0.987840,1.086763
2,2022-07-12,1357.483032,1391.800049,1405.250000,1389.400024,1405.250000,4938487.0,2023Q2,0.733383,0.000000,...,1.362770,1.527525,1.732051,2.000000,2.380476,-0.895255,-0.895255,0.773016,-0.987840,-0.987840
3,2022-07-13,1324.369995,1357.849976,1394.199951,1354.250000,1390.000000,7972805.0,2023Q2,0.733383,0.860903,...,1.224745,1.362770,1.527525,1.732051,2.000000,-0.808400,-0.895255,-0.895255,0.773016,-0.987840
4,2022-07-14,1317.737793,1351.050049,1372.650024,1349.250000,1362.000000,5186205.0,2023Q2,0.733383,0.841832,...,1.105542,1.224745,1.362770,1.527525,1.732051,1.175455,-0.808400,-0.895255,-0.895255,0.773016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,2024-10-22,1714.550049,1714.550049,1740.000000,1710.699951,1730.050049,19437640.0,2025Q3,0.770711,0.885451,...,0.654654,0.733799,0.816497,0.904534,1.000000,0.326165,-1.883487,1.429460,1.176188,0.317386
136,2024-10-23,1735.800049,1735.800049,1746.500000,1705.050049,1705.050049,19018045.0,2025Q3,0.770711,0.791001,...,0.577350,0.654654,0.733799,0.816497,0.904534,0.675112,0.326165,-1.883487,1.429460,1.176188
137,2024-10-24,1749.650024,1749.650024,1768.650024,1738.099976,1738.099976,15416129.0,2025Q3,0.770711,0.777079,...,0.500000,0.577350,0.654654,0.733799,0.816497,-1.398208,0.675112,0.326165,-1.883487,1.429460
138,2024-10-25,1743.400024,1743.400024,1757.849976,1728.699951,1755.000000,13065239.0,2025Q3,0.770711,0.925540,...,0.420084,0.500000,0.577350,0.654654,0.733799,-1.541354,-1.398208,0.675112,0.326165,-1.883487


In [None]:
final_df_clean.columns

Index(['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume',
       'quarter', 'report_sentiment', 'daily_news_sentiment',
       'report_sentiment_z', 'daily_news_sentiment_z', 'momentum_5',
       'momentum_10', 'momentum_20', 'volatility_10', 'volatility_20',
       'price_change', 'volume_flow', 'volume_flow_ma10',
       'report_sentiment_z_lag1', 'report_sentiment_z_lag2',
       'report_sentiment_z_lag3', 'report_sentiment_z_lag4',
       'report_sentiment_z_lag5', 'daily_news_sentiment_z_lag1',
       'daily_news_sentiment_z_lag2', 'daily_news_sentiment_z_lag3',
       'daily_news_sentiment_z_lag4', 'daily_news_sentiment_z_lag5'],
      dtype='object')

In [None]:
final_df_clean=final_df_clean.drop(['Adj Close','Date','Volume','quarter'],axis="columns")
final_df_clean

Unnamed: 0,Close,High,Low,Open,report_sentiment,daily_news_sentiment,report_sentiment_z,daily_news_sentiment_z,momentum_5,momentum_10,...,report_sentiment_z_lag1,report_sentiment_z_lag2,report_sentiment_z_lag3,report_sentiment_z_lag4,report_sentiment_z_lag5,daily_news_sentiment_z_lag1,daily_news_sentiment_z_lag2,daily_news_sentiment_z_lag3,daily_news_sentiment_z_lag4,daily_news_sentiment_z_lag5
0,1397.099976,1410.000000,1394.750000,1410.000000,0.733383,0.000000,1.527525,-0.895255,0.032022,0.031984,...,1.732051,2.000000,2.380476,3.000000,3.000000,0.773016,-0.987840,-0.987840,1.086763,-0.988101
1,1406.150024,1409.900024,1393.250000,1394.000000,0.733383,0.000000,1.362770,-0.895255,0.037252,0.036793,...,1.527525,1.732051,2.000000,2.380476,3.000000,-0.895255,0.773016,-0.987840,-0.987840,1.086763
2,1391.800049,1405.250000,1389.400024,1405.250000,0.733383,0.000000,1.224745,-0.808400,0.028981,0.034104,...,1.362770,1.527525,1.732051,2.000000,2.380476,-0.895255,-0.895255,0.773016,-0.987840,-0.987840
3,1357.849976,1394.199951,1354.250000,1390.000000,0.733383,0.860903,1.105542,1.175455,-0.009772,0.010343,...,1.224745,1.362770,1.527525,1.732051,2.000000,-0.808400,-0.895255,-0.895255,0.773016,-0.987840
4,1351.050049,1372.650024,1349.250000,1362.000000,0.733383,0.841832,1.000000,1.168321,-0.032060,0.002263,...,1.105542,1.224745,1.362770,1.527525,1.732051,1.175455,-0.808400,-0.895255,-0.895255,0.773016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,1714.550049,1740.000000,1710.699951,1730.050049,0.770711,0.885451,0.577350,0.675112,0.018081,0.038460,...,0.654654,0.733799,0.816497,0.904534,1.000000,0.326165,-1.883487,1.429460,1.176188,0.317386
136,1735.800049,1746.500000,1705.050049,1705.050049,0.770711,0.791001,0.500000,-1.398208,0.021179,0.062854,...,0.577350,0.654654,0.733799,0.816497,0.904534,0.675112,0.326165,-1.883487,1.429460,1.176188
137,1749.650024,1768.650024,1738.099976,1738.099976,0.770711,0.777079,0.420084,-1.541354,0.045722,0.052484,...,0.500000,0.577350,0.654654,0.733799,0.816497,-1.398208,0.675112,0.326165,-1.883487,1.429460
138,1743.400024,1757.849976,1728.699951,1755.000000,0.770711,0.925540,0.333333,1.479137,0.036597,0.055966,...,0.420084,0.500000,0.577350,0.654654,0.733799,-1.541354,-1.398208,0.675112,0.326165,-1.883487


In [None]:
final_df_clean.to_csv("rl_ready_features.csv", index=False)

In [None]:
!pip install gymnasium stable-baselines3[extra]




In [None]:
class StockTradingEnv(gym.Env):
    def __init__(self, df, window_size=5, initial_balance=10000):
        super(StockTradingEnv, self).__init__()

        self.df = df.reset_index(drop=True)
        self.window_size = window_size
        self.initial_balance = initial_balance

        self.df["Close"] = pd.to_numeric(self.df["Close"], errors="coerce")
        self.df = self.df.dropna(subset=["Close"])

        self.feature_columns = [col for col in df.columns if col not in ['Date', 'quarter'] and df[col].dtype != 'O']
        self.num_features = len(self.feature_columns)

        self.action_space = spaces.Discrete(3)  # 0 = Hold, 1 = Buy, 2 = Sell
        obs_len = 1 + window_size * self.num_features
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(obs_len,), dtype=np.float32)

        self.reset()

    def _get_observation(self):
        window = self.df.loc[self.current_step - self.window_size:self.current_step - 1, self.feature_columns].values.flatten()
        obs = np.concatenate(([self.position], window), axis=0)
        return obs.astype(np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = self.window_size
        self.balance = self.initial_balance
        self.position = 0
        self.entry_price = 0
        self.total_profit = 0
        self.trades = 0
        self.trade_history = []
        self.portfolio_value = self.initial_balance
        obs = self._get_observation()
        return obs, {}

    def step(self, action):
        terminated = False
        truncated = False
        reward = 0
        price = self.df.loc[self.current_step, "Close"]

        if action == 1:  # Buy
            if self.position == 0:
                self.position = 1
                self.entry_price = price
                self.trade_history.append(('Buy', self.current_step, price))

        elif action == 2:  # Sell
            if self.position == 1:
                profit = price - self.entry_price
                reward = profit
                self.total_profit += profit
                self.balance += profit  # add realized profit
                self.position = 0
                self.entry_price = 0
                self.trades += 1
                self.trade_history.append(('Sell', self.current_step, price))

        self.current_step += 1
        if self.current_step >= len(self.df) - 1:
            terminated = True

        # Update portfolio value with unrealized PnL
        unrealized = (price - self.entry_price) if self.position == 1 else 0
        self.portfolio_value = self.balance + unrealized

        next_obs = self._get_observation()
        info = {
            "balance": self.balance,
            "portfolio_value": self.portfolio_value,
            "unrealized_pnl": unrealized,
            "total_profit": self.total_profit,
            "trades": self.trades
        }
        return next_obs, reward, terminated, truncated, info

    def render(self):
        print(f"Step: {self.current_step}, Balance: {self.balance}, Portfolio Value: {self.portfolio_value}, Profit: {self.total_profit}, Trades: {self.trades}")


In [None]:
!pip install stable_baselines3

Collecting stable_baselines3
  Downloading stable_baselines3-2.6.0-py3-none-any.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable_baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable_baselines3)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable_baselines3)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable_baselines3)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable_baselines3)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (

In [None]:
from stable_baselines3 import DQN
from stable_baselines3.common.env_checker import check_env
import matplotlib.pyplot as plt

# Load your data
df = pd.read_csv("/content/rl_ready_sentiment_features.csv")
#df['Date'] = pd.to_datetime(df['Date'])
df = df.dropna()

# Prepare environment
env = StockTradingEnv(df, window_size=5)
check_env(env)

# Train the model
model = DQN("MlpPolicy", env, verbose=1, learning_rate=1e-4, buffer_size=10000, batch_size=32, train_freq=1, target_update_interval=100)
model.learn(total_timesteps=10_000)

# Evaluate
obs, _ = env.reset()
done = False
total_reward = 0
while not done:
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, _ = env.step(action)
    done = terminated or truncated
    total_reward += reward

print(f"Total Profit: {env.total_profit}")
print(f"Number of Trades: {env.trades}")

# Save the trained model
model.save("dqn_hdfc_model")



Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 134      |
|    ep_rew_mean      | 222      |
|    exploration_rate | 0.491    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 155      |
|    time_elapsed     | 3        |
|    total_timesteps  | 536      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 9.43e+04 |
|    n_updates        | 435      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 134      |
|    ep_rew_mean      | 239      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 162      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1072     |
| train/              |        

In [None]:
import pandas as pd
from stable_baselines3 import DQN
import matplotlib.pyplot as plt
import numpy as np

df = pd.read_csv("rl_ready_features.csv")
#df["Date"] = pd.to_datetime(df["Date"])

env = StockTradingEnv(df)
model = DQN.load("dqn_hdfc_model")

obs, _ = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    if done:
        break

trades = pd.DataFrame(env.trade_history)

# === Metrics ===
total_profit = trades['profit'].sum()
win_rate = (trades['profit'] > 0).mean()
avg_profit = trades['profit'].mean()
returns = trades['profit']
sharpe = returns.mean() / returns.std() * np.sqrt(252 / len(returns)) if returns.std() > 0 else 0
num_trades = len(trades)

print(f"Total Profit: ₹{total_profit:.2f}")
print(f"Win Rate: {win_rate:.2%}")
print(f"Avg Trade Profit: ₹{avg_profit:.2f}")
print(f"Sharpe Ratio: {sharpe:.2f}")
print(f"Number of Trades: {num_trades}")

# === Equity Curve ===
trades['cum_profit'] = trades['profit'].cumsum()
plt.figure(figsize=(10, 5))
plt.plot(trades['date'], trades['cum_profit'])
plt.title("Equity Curve")
plt.xlabel("Date")
plt.ylabel("Cumulative Profit")
plt.grid(True)
plt.show()

# === Trade Overlay Plot ===
price = df.set_index("Date")["Close"]
plt.figure(figsize=(12, 6))
plt.plot(price, label="Price")

for _, row in trades.iterrows():
    color = "green" if row["type"] == "BUY" else "red"
    marker = "^" if row["type"] == "BUY" else "v"
    plt.scatter(row["date"], row["exit_price"], color=color, marker=marker, label=row["type"])

plt.title("Trades on Price Chart")
plt.xlabel("Date")
plt.ylabel("Price")
plt.grid(True)
plt.legend()
plt.show()


ValueError: too many values to unpack (expected 4)

In [None]:
import pandas as pd

articles = pd.read_csv("/content/final.csv")
q_sent = pd.read_csv("/content/HDFC.csv")

# Drop rows with invalid dates in articles
article_date_col = next((col for col in articles.columns if 'date' in col.lower()), None)
quarter_date_col = 'Date'

articles[article_date_col] = pd.to_datetime(articles[article_date_col], errors='coerce')
q_sent[quarter_date_col] = pd.to_datetime(q_sent[quarter_date_col], errors='coerce')

articles = articles.dropna(subset=[article_date_col])

# Aggregate daily news sentiment (mean)
daily_news = (
    articles
    .groupby(articles[article_date_col].dt.date)['sentiment_score']
    .mean()
    .reset_index()
    .rename(columns={article_date_col: 'Date', 'sentiment_score': 'daily_news_sentiment'})
)
daily_news['Date'] = pd.to_datetime(daily_news['Date'])

# Preserve all columns from HDFC.csv
# Merge on Date
df_nochange = pd.merge(q_sent, daily_news, on='Date', how='left')

# Fill days without news
df_nochange['daily_news_sentiment'] = final_df['daily_news_sentiment'].fillna(0)

# Save to CSV
output_path = 'final_sentiment_nochange.csv'
df_nochange.to_csv(output_path, index=False)

# Optionally: print or return a preview
df_nochange


Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume,quarter,report_sentiment,daily_news_sentiment
0,NaT,HDFCBANK.NS,HDFCBANK.NS,HDFCBANK.NS,HDFCBANK.NS,HDFCBANK.NS,HDFCBANK.NS,,,0.000000
1,2022-04-01,1451.98486328125,1506.0,1510.0,1470.300048828125,1476.4000244140625,7284278,2023Q1,0.726230,0.000000
2,2022-04-04,1597.376220703125,1656.800048828125,1722.0999755859375,1562.550048828125,1580.0,48725485,2023Q1,0.726230,0.000000
3,2022-04-05,1550.5675048828125,1608.25,1666.699951171875,1602.0,1666.699951171875,15461291,2023Q1,0.726230,0.000000
4,2022-04-06,1495.226318359375,1550.8499755859375,1589.800048828125,1547.3499755859375,1587.699951171875,13513930,2023Q1,0.726230,0.000000
...,...,...,...,...,...,...,...,...,...,...
673,2024-12-23,1801.0,1801.0,1806.0,1781.300048828125,1782.0,5522296,2025Q3,0.770711,0.807341
674,2024-12-24,1798.0999755859375,1798.0999755859375,1808.699951171875,1789.550048828125,1802.050048828125,7242917,2025Q3,0.770711,0.874686
675,2024-12-26,1790.75,1790.75,1812.0,1780.75,1798.550048828125,5240839,2025Q3,0.770711,0.869924
676,2024-12-27,1798.25,1798.25,1805.949951171875,1789.0999755859375,1794.800048828125,3629665,2025Q3,0.770711,0.868356


In [None]:
df_nochange.dropna()

Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume,quarter,report_sentiment,daily_news_sentiment
1,2022-04-01,1451.98486328125,1506.0,1510.0,1470.300048828125,1476.4000244140625,7284278,2023Q1,0.726230,0.000000
2,2022-04-04,1597.376220703125,1656.800048828125,1722.0999755859375,1562.550048828125,1580.0,48725485,2023Q1,0.726230,0.000000
3,2022-04-05,1550.5675048828125,1608.25,1666.699951171875,1602.0,1666.699951171875,15461291,2023Q1,0.726230,0.000000
4,2022-04-06,1495.226318359375,1550.8499755859375,1589.800048828125,1547.3499755859375,1587.699951171875,13513930,2023Q1,0.726230,0.000000
5,2022-04-07,1462.349365234375,1516.75,1541.3499755859375,1513.699951171875,1541.300048828125,20390866,2023Q1,0.726230,0.000000
...,...,...,...,...,...,...,...,...,...,...
673,2024-12-23,1801.0,1801.0,1806.0,1781.300048828125,1782.0,5522296,2025Q3,0.770711,0.807341
674,2024-12-24,1798.0999755859375,1798.0999755859375,1808.699951171875,1789.550048828125,1802.050048828125,7242917,2025Q3,0.770711,0.874686
675,2024-12-26,1790.75,1790.75,1812.0,1780.75,1798.550048828125,5240839,2025Q3,0.770711,0.869924
676,2024-12-27,1798.25,1798.25,1805.949951171875,1789.0999755859375,1794.800048828125,3629665,2025Q3,0.770711,0.868356


In [None]:
df_clean_nochange=df_nochange.drop(['Adj Close','Date','Volume','quarter'],axis="columns")

In [None]:
from stable_baselines3 import DQN
from stable_baselines3.common.env_checker import check_env
import matplotlib.pyplot as plt

# Load your data
df = pd.read_csv("/content/final_sentiment_nochange.csv")
#df['Date'] = pd.to_datetime(df['Date'])
df = df.dropna()

# Prepare environment
env = StockTradingEnv(df, window_size=5)
check_env(env)

# Train the model
model = DQN("MlpPolicy", env, verbose=1, learning_rate=1e-4, buffer_size=10000, batch_size=32, train_freq=1, target_update_interval=100)
model.learn(total_timesteps=10_000)

# Evaluate
obs, _ = env.reset()
done = False
total_reward = 0
while not done:
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, _ = env.step(action)
    done = terminated or truncated
    total_reward += reward

print(f"Total Profit: {env.total_profit}")
print(f"Number of Trades: {env.trades}")

# Save the trained model
model.save("dqn_hdfc_model_2")



Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 671      |
|    ep_rew_mean      | 230      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 188      |
|    time_elapsed     | 14       |
|    total_timesteps  | 2684     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 6.95     |
|    n_updates        | 2583     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 671      |
|    ep_rew_mean      | 123      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 187      |
|    time_elapsed     | 28       |
|    total_timesteps  | 5368     |
| train/              |        