In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm
from datetime import datetime
from torch.utils.data import Dataset, DataLoader
from glob import glob

In [2]:
DATA_PATH = "../data"

DEVICE = "cpu"
NUM_TAGS = 256
EMBEDDING_DIM = 768
np.random.seed(42)

TRACK_EMBEDDING_NUM = 5

In [3]:
embeddings = glob(f"{DATA_PATH}/track_embeddings/*")[:TRACK_EMBEDDING_NUM]
track_idx2embeds = {}
for fn in tqdm(embeddings):
    track_idx = int(fn.split('/')[-1].split('.')[0])
    embeds = np.load(fn)
    track_idx2embeds[track_idx] = embeds

track_ids = list(track_idx2embeds.keys())

100%|██████████| 5/5 [00:00<00:00, 584.13it/s]


In [4]:
track_ids

[30121, 18560, 5785, 68703, 66516]

In [5]:
df_train = pd.read_csv(f'{DATA_PATH}/train.csv')
df_test = pd.read_csv(f'{DATA_PATH}/test.csv')

train_mask = df_train["track"].isin(track_ids)
df_train = df_train[train_mask]

test_mask = df_test["track"].isin(track_ids)
df_test = df_test[test_mask]

In [6]:
import gym
from gym import spaces
from gym.utils import seeding

In [7]:
class CustomEnvironment(gym.Env):
    def __init__(self, track_id, true_tags):
        self.track_id = track_id
        self.true_tags = true_tags
        self.embeddings = self._get_embeds()
        self.current_step = 0
        self.max_steps = self.embeddings.shape[0] - 1
        self.action_space = spaces.Discrete(NUM_TAGS)
        self.observation_space = spaces.Box(low=0, high=1, shape=(NUM_TAGS + EMBEDDING_DIM,))        
        self._state = np.zeros(NUM_TAGS)
    
    def is_episode_done(self):
        if self.current_step >= self.max_steps:
            return True
        return False
    
    def step(self, action):
        reward = self.calculate_reward(action)
        self._state[action] = 1
        
        done = self.is_episode_done()
        if not done:
            self.current_step += 1
            print(self._state.shape)
            self.observation_space[NUM_TAGS:] = self.embeddings[self.current_step]
        return self._state, reward, done, {}
    
    def reset(self, **kwargs):
        self._state = np.zeros(self.action_space.n)
        self.current_step = 0 
        return self._state
    
    def render(self, mode="human"): ...
    
    def calculate_reward(self, action):
        reward = 0
        if self._state[action] == 0:
            reward += 1
            if action in self.true_tags:
                reward += 1
            else:
                reward -= 1.25
        else:
            if action in self.true_tags :
                reward -= 0.5
            else:
                reward -= 1
        return reward
    
    def _get_embeds(self):
        return track_idx2embeds[self.track_id]


In [8]:
df_train

Unnamed: 0,track,tags
8291,30121,23208
24036,68703,315688149
39001,18560,33181
42500,66516,105181
46850,5785,269117


In [9]:
TRACK_ID, TRUE_TAGS = df_train.iloc[2, :]
TRUE_TAGS = list(map(int, TRUE_TAGS.split(",")))

# Create the custom environment
env = CustomEnvironment(TRACK_ID, TRUE_TAGS)

# Number of episodes to run for testing
num_episodes = 5

for episode in range(num_episodes):
    observation = env.reset()
    # print(observation)
    done = False
    total_reward = 0

    while not done:
        # Choose an action (for testing, you can choose a random action)
        act = env.action_space.sample()

        # Take a step in the environment
        observation, reward, done, _ = env.step(act)

        # Accumulate the reward
        total_reward += reward

    print(f"Episode {episode + 1}: Total Reward = {total_reward}")

env.close()

(256,)


TypeError: 'Box' object does not support item assignment