In [1]:
# default_exp download

In [2]:
#exports
import pandas as pd
import typer
import textwrap
import requests
from bs4 import BeautifulSoup as bs

In [3]:
#exports
def get_every_noise_canvas(everynoise_url='https://everynoise.com/'):
    r = requests.get(everynoise_url)
    soup = bs(r.text, features='lxml')
    
    canvases = soup.find_all('div', attrs={'class': 'canvas'})
    assert len(canvases) == 1, ''
    canvas = canvases[0]

    return canvas

In [4]:
canvas = get_every_noise_canvas()

In [5]:
#exports
extract_style_elems = lambda genre_elem: {
    style_elem.split(': ')[0].strip(): style_elem.split(': ')[1].replace('px', '')
    for style_elem 
    in genre_elem['style'].split(';')
}

def extract_canvas_height_width(canvas):
    canvas_style_elems = extract_style_elems(canvas)

    canvas_height = int(canvas_style_elems['height'])
    canvas_width = int(canvas_style_elems['width'])

    return canvas_height, canvas_width

In [6]:
canvas_height, canvas_width = extract_canvas_height_width(canvas)

canvas_height, canvas_width

(19666, 1607)

In [7]:
genre_elem = canvas.find('div')

genre_elem

<div class="genre scanme" id="item1" onclick='playx("1Os92edk1L3iu1PNQtgieM", "australian classical piano", this);' preview_url="https://p.scdn.co/mp3-preview/ff605ba13e2d6113247040ea1bcc77c33674a1ce" scan="true" style="color: #1ca3c2; top: 19051px; left: 502px; font-size: 100%" title='e.g. Edvard Grieg "Lyric Pieces Book I, Op.12: 1. Arietta"'>australian classical piano<a class="navlink" href="engenremap-australianclassicalpiano.html" onclick="event.stopPropagation();">»</a> </div>

In [8]:
#exports
genre_elem_to_name = lambda genre_elem: genre_elem.text.replace('» ', '')

In [9]:
genre_elem_to_name(genre_elem)

'australian classical piano'

In [10]:
genre_style_elems = extract_style_elems(genre_elem)

genre_style_elems

{'color': '#1ca3c2', 'top': '19051', 'left': '502', 'font-size': '100%'}

In [11]:
#exports
def get_genre_xy(genre_style_elems, canvas_width, canvas_height):
    x = int(genre_style_elems['left'].replace('px', ''))
    y = canvas_height - int(genre_style_elems['top'].replace('px', ''))

    return x, y

In [12]:
x, y = get_genre_xy(genre_style_elems, canvas_width, canvas_height)

x, y

(502, 615)

In [22]:
#exports
def extract_genre_attrs(genre_elem, canvas_width, canvas_height):
    genre_attrs = {}

    genre_style_elems = extract_style_elems(genre_elem)

    genre_attrs['genre'] = genre_elem_to_name(genre_elem)
    genre_attrs['x'], genre_attrs['y'] = get_genre_xy(genre_style_elems, canvas_width, canvas_height)
    genre_attrs['hex_colour'] = genre_style_elems['color']
    
    return genre_attrs

In [23]:
genre_attrs = extract_genre_attrs(genre_elem, canvas_width, canvas_height)

genre_attrs

{'genre': 'australian classical piano',
 'x': 502,
 'y': 615,
 'hex_colour': '#1ca3c2'}

In [24]:
#exports
def get_df_genre_attrs(everynoise_url='https://everynoise.com/'):
    canvas = get_every_noise_canvas(everynoise_url=everynoise_url)
    canvas_height, canvas_width = extract_canvas_height_width(canvas)

    genre_elems = canvas.find_all('div')
    all_genre_attrs = []

    for genre_elem in genre_elems:
        genre_attrs = extract_genre_attrs(genre_elem, canvas_width, canvas_height)
        all_genre_attrs += [genre_attrs]
        
    df_genre_attrs = pd.DataFrame(all_genre_attrs)
    
    return df_genre_attrs

In [25]:
df_genre_attrs = get_df_genre_attrs()

df_genre_attrs.head()

Unnamed: 0,genre,x,y,hex_colour
0,australian classical piano,502,615,#1ca3c2
1,piano blues,911,3891,#66882d
2,histoire pour enfants,1153,5673,#568608
3,dalarna indie,401,10634,#a77e17
4,cante alentejano,615,1881,#399c5d


In [26]:
#exports
app = typer.Typer()

In [27]:
#exports
@app.command()
def download_genre_attrs(fp='data/genre_attrs.csv'):
    # TODO if dir does not exist then create it
    df_genre_attrs = get_df_genre_attrs()
    df_genre_attrs.to_csv(fp, index=False)
    
    return

In [28]:
fp = '../data/genre_attrs.csv'

download_genre_attrs(fp)

In [29]:
#exports
if __name__ == '__main__' and '__file__' in globals():
    app()

In [30]:
#hide
from nbdev.export import notebook2script
notebook2script('01-library-gen.ipynb')

Converted 01-library-gen.ipynb.
