In [5]:
import numpy as np
import gym
from gym import spaces
from collections import defaultdict
from docx import Document  # For generating the report

# Simulate Stock Market Environment
class StockMarketEnv(gym.Env):
    def __init__(self, data, initial_balance=10000):
        super(StockMarketEnv, self).__init__()
        self.data = data
        self.initial_balance = initial_balance
        self.current_step = 0
        self.balance = initial_balance
        self.num_shares = 0
        self.net_worth = initial_balance
        self.transaction_cost = 10
        self.done = False
        self.action_space = spaces.Discrete(3)  # Buy, Sell, Hold
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(1,))

    def reset(self):
        self.current_step = 0
        self.balance = self.initial_balance
        self.num_shares = 0
        self.net_worth = self.initial_balance
        self.done = False
        return self._get_obs()

    def _get_obs(self):
        return np.array([self.data[self.current_step]])

    def step(self, action):
        current_price = self.data[self.current_step]
        reward = 0

        if action == 0:  # Buy
            if self.balance >= current_price:
                self.num_shares += 1
                self.balance -= current_price + self.transaction_cost
        elif action == 1:  # Sell
            if self.num_shares > 0:
                self.num_shares -= 1
                self.balance += current_price - self.transaction_cost
        elif action == 2:  # Hold
            pass

        self.net_worth = self.balance + self.num_shares * current_price
        reward = self.net_worth - self.initial_balance

        self.current_step += 1
        if self.current_step >= len(self.data) - 1:
            self.done = True

        return self._get_obs(), reward, self.done, {}

    def render(self, mode='human'):
        print(f"Step: {self.current_step}, Price: {self.data[self.current_step]}, Net Worth: {self.net_worth}")


# Temporal Difference Agent
class TDTradingAgent:
    def __init__(self, env, alpha=0.1, gamma=0.99, epsilon=0.1):
        self.env = env
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.q_table = defaultdict(lambda: np.zeros(env.action_space.n))

    def choose_action(self, state):
        state = tuple(state)  # Convert numpy array to tuple
        if np.random.rand() < self.epsilon:
            return self.env.action_space.sample()  # Explore
        else:
            return np.argmax(self.q_table[state])

    def train(self, episodes):
        for episode in range(episodes):
            state = tuple(self.env.reset())  # Convert state to tuple
            done = False
            while not done:
                action = self.choose_action(state)
                next_state, reward, done, _ = self.env.step(action)
                next_state = tuple(next_state)  # Convert next_state to tuple
                best_next_action = np.argmax(self.q_table[next_state])

                # Update Q-value
                self.q_table[state][action] = (1 - self.alpha) * self.q_table[state][action] + \
                                              self.alpha * (reward + self.gamma * self.q_table[next_state][best_next_action])

                state = next_state  # Move to the next state

    def test(self):
        state = tuple(self.env.reset())  # Convert state to tuple
        done = False
        while not done:
            action = np.argmax(self.q_table[state])
            next_state, _, done, _ = self.env.step(action)
            self.env.render()
            state = tuple(next_state)  # Ensure state remains a tuple


# Create dummy stock price data
np.random.seed(42)
stock_prices = np.cumsum(np.random.randn(100) * 2 + 100)

# Create and train the agent
env = StockMarketEnv(stock_prices)
agent = TDTradingAgent(env)
agent.train(100)
agent.test()

# Create a report
doc = Document()
doc.add_heading("Reinforcement Learning Trading Agent Report", level=1)
doc.add_paragraph("This document contains results and analysis of the trading agent.")
doc.add_paragraph(f"Initial Balance: {env.initial_balance}")
doc.add_paragraph(f"Final Net Worth: {env.net_worth}")
doc.add_paragraph(f"Number of Shares Held: {env.num_shares}")
doc.add_paragraph(f"Final Balance: {env.balance}")
doc.save("TradingAgentReport.docx")


Step: 1, Price: 200.7168997036801, Net Worth: 10000.0
Step: 2, Price: 302.0122767798815, Net Worth: 9990.0
Step: 3, Price: 405.05833649269755, Net Worth: 10081.2953770762
Step: 4, Price: 504.59002974325085, Net Worth: 10277.387496501833
Step: 5, Price: 604.1217558293525, Net Worth: 10565.982576253493
Step: 6, Price: 707.2801814603673, Net Worth: 10954.1094805979
Step: 7, Price: 808.8150509186731, Net Worth: 11459.901608752974
Step: 8, Price: 907.8761021468032, Net Worth: 12059.110825502808
Step: 9, Price: 1008.9612222339751, Net Worth: 12742.538184099718
Step: 10, Price: 1108.0343868483503, Net Worth: 13541.219144797093
Step: 11, Price: 1207.1029273412098, Net Worth: 14422.87762632647
Step: 12, Price: 1307.586851884342, Net Worth: 15403.563031255064
Step: 13, Price: 1403.7602913950263, Net Worth: 16498.886201229518
Step: 14, Price: 1500.3104557300003, Net Worth: 17652.967475357727
Step: 15, Price: 1599.1858806715184, Net Worth: 18811.569447377417
Step: 16, Price: 1697.1602184308495, Ne

In [6]:
import numpy as np
import pandas as pd
import gym
from gym import spaces
from collections import defaultdict
from docx import Document  # For generating the report
from dash import Dash, dcc, html, dash_table
import plotly.graph_objs as go

# Simulate Stock Market Environment
class StockMarketEnv(gym.Env):
    def __init__(self, data, initial_balance=10000):
        super(StockMarketEnv, self).__init__()
        self.data = data
        self.initial_balance = initial_balance
        self.current_step = 0
        self.balance = initial_balance
        self.num_shares = 0
        self.net_worth = initial_balance
        self.transaction_cost = 10
        self.done = False
        self.action_space = spaces.Discrete(3)  # Buy, Sell, Hold
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(1,))
        self.history = []  # Track actions and net worth for visualization

    def reset(self):
        self.current_step = 0
        self.balance = self.initial_balance
        self.num_shares = 0
        self.net_worth = self.initial_balance
        self.done = False
        self.history = []  # Reset history
        return self._get_obs()

    def _get_obs(self):
        return np.array([self.data[self.current_step]])

    def step(self, action):
        current_price = self.data[self.current_step]
        reward = 0

        if action == 0:  # Buy
            if self.balance >= current_price:
                self.num_shares += 1
                self.balance -= current_price + self.transaction_cost
        elif action == 1:  # Sell
            if self.num_shares > 0:
                self.num_shares -= 1
                self.balance += current_price - self.transaction_cost
        elif action == 2:  # Hold
            pass

        self.net_worth = self.balance + self.num_shares * current_price
        reward = self.net_worth - self.initial_balance

        self.history.append({
            "Step": self.current_step,
            "Action": action,
            "Price": current_price,
            "Net Worth": self.net_worth,
            "Balance": self.balance,
            "Shares": self.num_shares
        })

        self.current_step += 1
        if self.current_step >= len(self.data) - 1:
            self.done = True

        return self._get_obs(), reward, self.done, {}

    def render(self, mode='human'):
        print(f"Step: {self.current_step}, Price: {self.data[self.current_step]}, Net Worth: {self.net_worth}")


# Temporal Difference Agent
class TDTradingAgent:
    def __init__(self, env, alpha=0.1, gamma=0.99, epsilon=0.1):
        self.env = env
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.q_table = defaultdict(lambda: np.zeros(env.action_space.n))

    def choose_action(self, state):
        state = tuple(state)  # Convert numpy array to tuple
        if np.random.rand() < self.epsilon:
            return self.env.action_space.sample()  # Explore
        else:
            return np.argmax(self.q_table[state])

    def train(self, episodes):
        for episode in range(episodes):
            state = tuple(self.env.reset())  # Convert state to tuple
            done = False
            while not done:
                action = self.choose_action(state)
                next_state, reward, done, _ = self.env.step(action)
                next_state = tuple(next_state)  # Convert next_state to tuple
                best_next_action = np.argmax(self.q_table[next_state])

                # Update Q-value
                self.q_table[state][action] = (1 - self.alpha) * self.q_table[state][action] + \
                                              self.alpha * (reward + self.gamma * self.q_table[next_state][best_next_action])

                state = next_state  # Move to the next state

    def test(self):
        state = tuple(self.env.reset())  # Convert state to tuple
        done = False
        while not done:
            action = np.argmax(self.q_table[state])
            next_state, _, done, _ = self.env.step(action)
            state = tuple(next_state)  # Ensure state remains a tuple


# Create dummy stock price data
np.random.seed(42)
stock_prices = np.cumsum(np.random.randn(100) * 2 + 100)

# Create and train the agent
env = StockMarketEnv(stock_prices)
agent = TDTradingAgent(env)
agent.train(100)
agent.test()

# Extract history for visualization
history_df = pd.DataFrame(env.history)

# Initialize Dash app
app = Dash(__name__)

app.layout = html.Div([
    html.H1("Reinforcement Learning Trading Agent Dashboard", style={'textAlign': 'center'}),
    
    # Line Graph: Net Worth Over Steps
    dcc.Graph(
        id='net-worth-graph',
        figure={
            'data': [
                go.Scatter(
                    x=history_df["Step"],
                    y=history_df["Net Worth"],
                    mode='lines+markers',
                    name='Net Worth'
                )
            ],
            'layout': go.Layout(
                title="Net Worth Over Time",
                xaxis={'title': 'Steps'},
                yaxis={'title': 'Net Worth'}
            )
        }
    ),

    # Scatter Plot: Actions Taken
    dcc.Graph(
        id='actions-graph',
        figure={
            'data': [
                go.Scatter(
                    x=history_df["Step"],
                    y=history_df["Price"],
                    mode='markers',
                    marker=dict(size=10, color=history_df["Action"], colorscale='Viridis', showscale=True),
                    name='Actions'
                )
            ],
            'layout': go.Layout(
                title="Actions Taken Over Steps",
                xaxis={'title': 'Steps'},
                yaxis={'title': 'Price'},
                coloraxis_colorbar=dict(title="Action (0=Buy, 1=Sell, 2=Hold)")
            )
        }
    ),

    # Summary Table
    dash_table.DataTable(
        id='summary-table',
        columns=[{"name": col, "id": col} for col in ["Initial Balance", "Final Balance", "Final Net Worth", "Total Shares"]],
        data=[{
            "Initial Balance": env.initial_balance,
            "Final Balance": env.balance,
            "Final Net Worth": env.net_worth,
            "Total Shares": env.num_shares
        }],
        style_table={'margin': 'auto', 'width': '50%'},
        style_header={'backgroundColor': 'lightgrey', 'fontWeight': 'bold'},
        style_cell={'textAlign': 'center'}
    )
])

if __name__ == '__main__':
    app.run_server(debug=True)


In [7]:
import numpy as np
import pandas as pd
import gym
from gym import spaces
import random
from collections import defaultdict
from dash import Dash, dcc, html, dash_table
import plotly.graph_objs as go

# Load dataset
data = pd.read_csv("healthcare_dataset.csv")  # Replace with your file path
data = data.dropna()  # Handle missing values

# Normalize relevant columns
columns_to_normalize = [
    "age", "trestbps", "chol", "thalach", "oldpeak", "ca"
]
data[columns_to_normalize] = (data[columns_to_normalize] - data[columns_to_normalize].min()) / \
                             (data[columns_to_normalize].max() - data[columns_to_normalize].min())

# Define Healthcare Environment
class HealthcareEnv(gym.Env):
    def __init__(self, data):
        super(HealthcareEnv, self).__init__()
        self.data = data
        self.current_index = 0
        self.action_space = spaces.Discrete(3)  # Actions: 0=Decrease dosage, 1=Maintain, 2=Increase dosage
        self.observation_space = spaces.Box(low=0, high=1, shape=(data.shape[1],), dtype=np.float32)
        self.current_state = None
        self.done = False
        self.reward_history = []
        self.treatment_history = []

    def reset(self):
        self.current_index = 0
        self.done = False
        self.reward_history = []
        self.treatment_history = []
        self.current_state = self.data.iloc[self.current_index].values
        return self.current_state

    def step(self, action):
        # Simulate reward: positive for improvement, negative for worsening
        reward = random.uniform(-1, 1)  # Simulated reward, adjust with real logic if needed
        self.reward_history.append(reward)

        # Update treatment history
        self.treatment_history.append({"Step": self.current_index, "Action": action, "Reward": reward})

        self.current_index += 1
        if self.current_index >= len(self.data) - 1:
            self.done = True
        self.current_state = self.data.iloc[self.current_index].values
        return self.current_state, reward, self.done, {}

    def render(self, mode='human'):
        print(f"Step: {self.current_index}, State: {self.current_state}, Reward: {self.reward_history[-1]}")

# Temporal Difference Learning Agent
class TDHealthcareAgent:
    def __init__(self, env, alpha=0.1, gamma=0.99, epsilon=0.1):
        self.env = env
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.q_table = defaultdict(lambda: np.zeros(env.action_space.n))

    def choose_action(self, state):
        state = tuple(state)  # Convert state to tuple to make it hashable
        if np.random.rand() < self.epsilon:
            return self.env.action_space.sample()  # Explore
        else:
            return np.argmax(self.q_table[state])

    def train(self, episodes):
        for episode in range(episodes):
            state = tuple(self.env.reset())  # Convert state to tuple here as well
            done = False
            while not done:
                action = self.choose_action(state)
                next_state, reward, done, _ = self.env.step(action)
                next_state = tuple(next_state)  # Convert next state to tuple
                best_next_action = np.argmax(self.q_table[next_state])

                # Update Q-value
                self.q_table[state][action] = (1 - self.alpha) * self.q_table[state][action] + \
                                              self.alpha * (reward + self.gamma * self.q_table[next_state][best_next_action])

                state = next_state

    def test(self):
        state = tuple(self.env.reset())  # Convert state to tuple
        done = False
        while not done:
            action = np.argmax(self.q_table[state])  # Use the tuple as the state key
            next_state, reward, done, _ = self.env.step(action)
            self.env.render()
            state = tuple(next_state)

# Create environment and agent
env = HealthcareEnv(data)
agent = TDHealthcareAgent(env)
agent.train(100)
agent.test()

# Prepare data for visualization
history_df = pd.DataFrame(env.treatment_history)

# Dash GUI for Interactive Visualization
app = Dash(__name__)

app.layout = html.Div([  
    html.H1("AI-Driven Healthcare System Dashboard", style={'textAlign': 'center'}),  
    
    # Line Graph: Reward over Steps
    dcc.Graph(
        id='reward-graph',
        figure={
            'data': [
                go.Scatter(
                    x=history_df["Step"],
                    y=history_df["Reward"],
                    mode='lines+markers',
                    name='Reward'
                )
            ],
            'layout': go.Layout(
                title="Rewards Over Time",
                xaxis={'title': 'Step'},
                yaxis={'title': 'Reward'}
            )
        }
    ),

    # Action Scatter Plot
    dcc.Graph(
        id='action-graph',
        figure={
            'data': [
                go.Scatter(
                    x=history_df["Step"],
                    y=history_df["Action"],
                    mode='markers',
                    marker=dict(size=10, color=history_df["Action"], colorscale='Viridis', showscale=True),
                    name='Actions'
                )
            ],
            'layout': go.Layout(
                title="Actions Over Time",
                xaxis={'title': 'Step'},
                yaxis={'title': 'Action (0=Decrease, 1=Maintain, 2=Increase)'}
            )
        }
    ),

    # Summary Table
    dash_table.DataTable(
        id='summary-table',
        columns=[{"name": col, "id": col} for col in ["Step", "Action", "Reward"]],
        data=history_df.to_dict('records'),
        style_table={'margin': 'auto', 'width': '60%'},
        style_header={'backgroundColor': 'lightgrey', 'fontWeight': 'bold'},
        style_cell={'textAlign': 'center'}
    )
])

if __name__ == '__main__':
    app.run_server(debug=True)


Step: 1, State: [0.5        1.         0.         0.43396226 0.17579909 1.
 0.         0.64122137 1.         0.5        0.         0.
 3.         0.        ], Reward: 0.48764954768565505
Step: 2, State: [0.85416667 1.         0.         0.48113208 0.10958904 0.
 1.         0.41221374 1.         0.41935484 0.         0.
 3.         0.        ], Reward: 0.9168933573636289
Step: 3, State: [0.66666667 1.         0.         0.50943396 0.17579909 0.
 1.         0.6870229  0.         0.         2.         0.25
 3.         0.        ], Reward: 0.26344909676031514
Step: 4, State: [0.6875     0.         0.         0.41509434 0.38356164 1.
 1.         0.26717557 0.         0.30645161 1.         0.75
 2.         0.        ], Reward: 0.8817584390509094
Step: 5, State: [0.60416667 0.         0.         0.05660377 0.27853881 0.
 0.         0.38931298 0.         0.16129032 1.         0.
 2.         1.        ], Reward: 0.23547676013300656
Step: 6, State: [0.60416667 1.         0.         0.18867925 0.