In [118]:
import os
from typing import List
import random
from hashlib import md5
from random import randint
import pandas as pd


GRID_W=5
GRID_H=3
NUM_GRIDS=200


In [137]:
def write_grids(subsets:List[List],path:str):

    #Ensure directory exists
    if not os.path.isdir(path):
        os.mkdir(path)

    #Format every subset in a grid format and save as CSV
    for i,subset in enumerate(subsets):
        csv_file = "".join([element + (',' if (j+1)%GRID_W else '\n')
                            for j,element in enumerate(subset)])[:-1]
        #                               Last character is \n   ^^^^^
        
        with open(os.path.join(path,f"{i+1}.csv"),"w") as f:
            f.write(csv_file)

    return True

def load_grids(path:str) -> List[List]:
    
    grids = [None]*NUM_GRIDS
    
    for i in range(NUM_GRIDS):
        with open(os.path.join(path,f"{i+1}.csv"),"r") as f:
            csv_file = f.read()

        grid = csv_file.replace('\n',',').split(',')
        grids[i] = [grid[i:i + GRID_W] for i in range(0, len(grid), GRID_W)]

    return grids

#Returns the index of the song which the sequence will win on.
def win_on(sequence:list, song2idx):   
    return max([song2idx[song] for song in sequence])    

In [143]:
def get_check_list(round_n:int,overwrite:bool=False):

    #Retrieve all songs
    with open(os.path.join('..','songs',f'songs_{round_n}.txt'),'r',encoding='utf-8') as f:
        songs = [song[:-1] for song in f.readlines()]

    #Maps a song to its position within the playlist
    song2idx = {song:i for i,song in enumerate(songs)}

    #Path of grids in local disk
    grid_path = os.path.join("..","grids",f"grids_{round_n}")
    if overwrite:    
        
        #Create the grids by randomly sampling from the playlist
        subsets = [random.sample(songs,GRID_W*GRID_H) for _ in range(NUM_GRIDS)]

        print(f"[WARNING]: Overwriting grids at {os.path.join(os.getcwd(),grid_path)}")
        write_grids(subsets,grid_path)

    #Loading grids from disk
    grids = load_grids(grid_path)

    #Contains the winning song index for GRID_H verses for every grid.
    data = [[win_on(vers,song2idx) for vers in grid] for grid in grids]

    #Adding the winning index for the whole grid
    #NOTE: When all GRID_H verses are won, the grid is won
    data = [v+[max(v)] for v in data]

    return pd.DataFrame(data=data,
                columns=[f"Vers {i+1}" for i in range(GRID_H)]+["Kaxxa"]
    )


In [152]:
df = get_check_list(2)


In [163]:
df

Unnamed: 0,Vers 1,Vers 2,Vers 3,Kaxxa
0,195,114,135,195
1,146,178,144,178
2,149,185,194,194
3,131,196,139,196
4,157,166,176,176
...,...,...,...,...
195,185,142,176,185
196,167,199,195,199
197,157,190,95,190
198,183,187,100,187
