In [108]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque

## Define the Environment

In [109]:
class TaxiChargingEnv:
    def __init__(self, max_charge_rate=22, battery_capacity=100, mu=1, sigma=1, alpha_t=0.01):
        # Charging actions: 0=none, 1=low, 2=medium, 3=high
        self.action_space = [0, max_charge_rate / 3, 2 * max_charge_rate / 3, max_charge_rate]

        # Define state space size
        self.state_space_size = (1,)  # Battery level (percentage)

        self.max_charge_rate = max_charge_rate
        self.battery_capacity = battery_capacity
        self.mu = mu
        self.sigma = sigma
        self.alpha_t = alpha_t
        self.reset()

    def reset(self):
        self.current_battery_level = 0
        self.current_time = 0
        return np.array([self.current_battery_level])

    def step(self, action):

        charging_rate = self.action_space[action]

        # Simulate the charging process for 15 minutes
        self.current_battery_level += charging_rate * 0.25
        self.current_battery_level = min(self.current_battery_level, self.battery_capacity)
        self.current_time += 0.25

        # Calculate charging cost
        charging_cost = self.alpha_t * np.exp(charging_rate)  # as per the given formula

        # Check if it's time for the taxi to leave
        done = self.current_time >= 2
        if done:
            energy_demand = np.random.normal(self.mu, self.sigma)
            if self.current_battery_level < energy_demand:
                reward = -1000  # Penalty for running out of energy
            else:
                # Reward is proportional to the energy saved
                reward = -charging_cost
        else:
            reward = -charging_cost

        return np.array([self.current_battery_level]), reward, done, {}


## Define Neural Network Architecture

In [110]:
import torch.nn as nn

class QNetwork(nn.Module):
    def __init__(self, state_size, action_size, hidden_size=256):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(state_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc4 = nn.Linear(hidden_size, hidden_size)
        self.fc5 = nn.Linear(hidden_size, action_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        return self.fc5(x)

## Define DQN Agent

In [111]:
class DQNAgent:
    def __init__(self, state_size, action_size, lr=0.01, gamma=0.95, batch_size=64):
        self.action_size = action_size
        self.memory = deque(maxlen=10000)
        self.gamma = gamma
        self.batch_size = batch_size

        self.q_network = QNetwork(state_size, action_size).float()
        self.target_network = QNetwork(state_size, action_size).float()
        self.target_network.load_state_dict(self.q_network.state_dict())
        self.optimizer = optim.Adam(self.q_network.parameters(), lr=lr)

    def store(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state, epsilon=0.1):
        if np.random.rand() < epsilon:
            return np.random.randint(self.action_size)
        state = torch.FloatTensor(state).unsqueeze(0)
        if self.q_network.training:
            self.q_network.eval()
        with torch.no_grad():
          actions = self.q_network(state)
        if self.q_network.training:
          self.q_network.train()
        return torch.argmax(actions).item()

    def train(self):
        if len(self.memory) < self.batch_size:
            return
        batch = random.sample(self.memory, self.batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)
        states = torch.FloatTensor(states)
        actions = torch.LongTensor(actions)
        rewards = torch.FloatTensor(rewards)
        next_states = torch.FloatTensor(next_states)
        dones = torch.BoolTensor(dones)

        current_q_values = self.q_network(states).gather(1, actions.unsqueeze(1))
        with torch.no_grad():
            max_next_q_values = self.target_network(next_states).max(1)[0]

        dones = dones.float()

        target_q_values = rewards + (1 - dones) * self.gamma * max_next_q_values

        loss = torch.nn.functional.mse_loss(current_q_values.squeeze(), target_q_values)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    def update_target_network(self):
        self.target_network.load_state_dict(self.q_network.state_dict())

## Define the training function

In [112]:
def train_dqn(env, agent, episodes=1000, epsilon_start=1.0, epsilon_min=0.1, epsilon_decay=0.9):
    scores = []
    epsilon = epsilon_start
    for episode in range(1, episodes + 1):
        state = env.reset()
        done = False
        score = 0
        while not done:
            action = agent.act(state, epsilon)
            next_state, reward, done, _ = env.step(action)
            agent.store(state, action, reward, next_state, done)
            agent.train()
            state = next_state
            score += reward
        agent.update_target_network()
        scores.append(score)
        epsilon = max(epsilon_min, epsilon * epsilon_decay)
        print(f"Episode: {episode}, Score: {score}")
    return scores


## Train the DQN

In [113]:
env = TaxiChargingEnv()
state_size = np.prod(env.state_space_size)
action_size = len(env.action_space)
agent = DQNAgent(state_size, action_size)
scores = train_dqn(env, agent)

Episode: 1, Score: -71745149.9129214
Episode: 2, Score: -107594232.47999148
Episode: 3, Score: -71745149.91292141
Episode: 4, Score: -35849143.82606453
Episode: 5, Score: -35872582.64383501
Episode: 6, Score: -93709.46683605366
Episode: 7, Score: -35896021.46160549
Episode: 8, Score: -71721711.09515093
Episode: 9, Score: -35872597.93858363
Episode: 10, Score: -45.964245849924744
Episode: 11, Score: -107617686.59251057
Episode: 12, Score: -35896036.75635411
Episode: 13, Score: -46893.01028956843
Episode: 14, Score: -93709.46683605365
Episode: 15, Score: -93709.46683605365
Episode: 16, Score: -70301.23856281105
Episode: 17, Score: -23454.192519092532
Episode: 18, Score: -23500.076764942463
Episode: 19, Score: -93740.05633328694
Episode: 20, Score: -46877.71554095179
Episode: 21, Score: -46908.30503818507
Episode: 22, Score: -76.55374308320792
Episode: 23, Score: -30.66949723328317
Episode: 24, Score: -76.55374308320792
Episode: 25, Score: -35849174.415561765
Episode: 26, Score: -76.55374

In [114]:
def compartmentalize_scores(scores):

    scores = np.array(scores)
    # Create bins representing percentiles
    bins = [np.percentile(scores, i) for i in range(0, 101, 10)]

    # Use digitize to get bin numbers for each score
    bin_indices = np.digitize(scores, bins, right=True)

    # Calculate the average score for each bin
    averages = [np.median(scores[bin_indices == i]) for i in range(1, 11)]

    return averages

averages = compartmentalize_scores(scores)

In [115]:
for avg in averages:
    print(avg)

-35872628.528080866
-35849174.415561765
-70316.53331142769
-46862.42079233515
-23469.487267709177
-76.55374308320792
-61.258994466566335
-61.25899446656633
-45.96424584992475
-30.66949723328317
