In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import random
import time


class SpectrumEnvironment:
    def __init__(self, num_bands=10):
        self.num_bands = num_bands  # Total number of frequency bands
        self.state = self._generate_spectrum_state()  # Current state of the spectrum
        self.total_reward = 0  # Track total reward
        self.rewards = []  # Track rewards for each step
        self.channel_history = [[] for _ in range(num_bands)]  # Store states for each channel

    def _generate_spectrum_state(self):
        """Randomize the spectrum state: 0 (idle) or 1 (occupied)."""
        return np.random.choice([0, 1], size=self.num_bands)

    def step(self, action):
        """
        Simulate sensing a specific band.
        action: Integer representing the index of the band to sense.
        Returns:
            state: Updated spectrum state.
            reward: Reward based on action.
        """
        reward = 0
        # Reward for sensing an idle band
        if self.state[action] == 0:
            reward = 20
        else:
            reward = -15

        self.total_reward += reward
        self.rewards.append(reward)
        
        # Record the current state in the history
        for i in range(self.num_bands):
            self.channel_history[i].append(self.state[i])
    
        # Randomize the spectrum state for the next step
        self.state = self._generate_spectrum_state()
        return self.state, reward

    def reset(self):
        """Reset the environment to an initial state."""
        self.state = self._generate_spectrum_state()
        self.total_reward = 0
        self.rewards = []
        return self.state
