In [None]:
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque

# ======================== Actor and Critic ===========================
class Actor(nn.Module):
    def __init__(self, state_dim, action_dim, max_action):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(state_dim, 256), nn.ReLU(),
            nn.Linear(256, 256), nn.ReLU(),
            nn.Linear(256, action_dim), nn.Tanh()
        )
        self.max_action = max_action

    def forward(self, state):
        return self.max_action * self.net(state)

class Critic(nn.Module):
    def __init__(self, state_dim, action_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(state_dim + action_dim, 256), nn.ReLU(),
            nn.Linear(256, 256), nn.ReLU(),
            nn.Linear(256, 1)
        )

    def forward(self, state, action):
        return self.net(torch.cat([state, action], dim=1))

# ========================== Replay Buffer ============================
class ReplayBuffer:
    def __init__(self, max_size=100000):
        self.buffer = deque(maxlen=max_size)

    def push(self, transition):
        self.buffer.append(transition)

    def sample(self, batch_size):
        transitions = random.sample(self.buffer, batch_size)
        return map(np.array, zip(*transitions))

# ========================== DDPG Agent ===============================
class DDPGAgent:
    def __init__(self, state_dim, action_dim, max_action):
        self.actor = Actor(state_dim, action_dim, max_action)
        self.actor_target = Actor(state_dim, action_dim, max_action)
        self.critic = Critic(state_dim, action_dim)
        self.critic_target = Critic(state_dim, action_dim)

        self.actor_target.load_state_dict(self.actor.state_dict())
        self.critic_target.load_state_dict(self.critic.state_dict())

        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=1e-4)
        self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=1e-3)
        self.max_action = max_action
        self.buffer = ReplayBuffer()

    def select_action(self, state, noise=0.1):
        state = torch.FloatTensor(state).unsqueeze(0)
        action = self.actor(state).detach().numpy()[0]
        return np.clip(action + noise * np.random.randn(*action.shape), -self.max_action, self.max_action)

    def train(self, batch_size=64, gamma=0.99, tau=0.005):
        if len(self.buffer.buffer) < batch_size:
            return

        states, actions, rewards, next_states, dones = self.buffer.sample(batch_size)
        
        states = torch.FloatTensor(states)
        actions = torch.FloatTensor(actions)
        rewards = torch.FloatTensor(rewards).unsqueeze(1)
        next_states = torch.FloatTensor(next_states)
        dones = torch.FloatTensor(dones).unsqueeze(1)

        with torch.no_grad():
            next_actions = self.actor_target(next_states)
            target_q = self.critic_target(next_states, next_actions)
            target_q = rewards + (1 - dones) * gamma * target_q

        # Critic update
        current_q = self.critic(states, actions)
        critic_loss = nn.MSELoss()(current_q, target_q)
        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        self.critic_optimizer.step()

        # Actor update
        actor_loss = -self.critic(states, self.actor(states)).mean()
        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        self.actor_optimizer.step()

        # Soft update
        for target_param, param in zip(self.actor_target.parameters(), self.actor.parameters()):
            target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)

        for target_param, param in zip(self.critic_target.parameters(), self.critic.parameters()):
            target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)

# ============================== Main =================================
env = gym.make("Pendulum-v1")  # Change to any continuous env
agent = DDPGAgent(env.observation_space.shape[0], env.action_space.shape[0], env.action_space.high[0])

episodes = 0 #200
for ep in range(episodes):
    state = env.reset()
    ep_reward = 0
    done = False
    while not done:
        action = agent.select_action(state)
        next_state, reward, done, _, _ = env.step(action)
        agent.buffer.push((state, action, reward, next_state, float(done)))
        agent.train()
        state = next_state
        ep_reward += reward
    print(f"Episode {ep}, Reward: {ep_reward:.2f}")

In [76]:
import gymnasium as gym
env = gym.make("Pendulum-v1")  # Change to any continuous env
agent = DDPGAgent(env.observation_space.shape[0], env.action_space.shape[0], env.action_space.high[0])
state, _ = env.reset()
action = agent.select_action(state)
print(action)

[-0.32281303]


In [5]:
class parent:
    def __init__(self):
        print("Hello from p constructor")
        print("Another Hello from p constructor")
        
    def greet(self):
        print("Hello from parent greet")
        
class child(parent):
    def __init__(self):
        super().__init__()
        print("Nonchaltant shit")
    # super().__init__()
    def greet(self):
        super().greet()
        # print("Hii")
        
obj = child()
obj.greet()

Hello from p constructor
Another Hello from p constructor
Nonchaltant shit
Hello from parent greet


In [12]:
def decor(func):
    def wrapper(arg):
        print("Hello before func")
        ret = func(arg)
        print("Hello after func")
        return ret
    return wrapper

@decor
def fun(arg):
    print(arg*arg)
fun(2)

Hello before func
4
Hello after func


In [7]:
import time

def timer(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"Function '{func.__name__}' took {end_time - start_time:.4f} seconds to run.")
        return None
    return wrapper

@timer
def my_expensive_function():
    time.sleep(2)
    return "Done!"

output = my_expensive_function()
print(output)

Function 'my_expensive_function' took 2.0011 seconds to run.
None
