In [1]:
# install.packages("magrittr")
# install.packages("purrr")
# install.packages("glue")
# install.packages("stringr")
# install.packages('spotifyr')
library(devtools)
library(spotifyr)
library(tidyverse)
library(magrittr) # better handling of pipes
library(purrr) # to work with lists and map functions
library(glue) # to paste strings
library(stringr) # to hand strings
library(rvest) # rvest makes scraping easier
library(polite) # polite is the "polite" version of rvest
library(xml2) # makes it easier to work with HTML and XML from R

Loading required package: usethis

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.3.6     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.7     [32m✔[39m [34mdplyr  [39m 1.0.9
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Attaching package: 'magrittr'


The following object is masked from 'package:purrr':

    set_names


The following object is masked from 'package:tidyr':

    extract



Attaching package: 'rvest'


The following object is masked from 'package:readr':

    guess_encoding




In [108]:
# The website in html
page = read_html("https://www.audioculture.co.nz/music_index?category=Person")

# Profile names
Profiles = page %>% html_nodes(".skippy-col-link") %>% html_text()

# Profile links
Links = page %>% html_nodes(".skippy-col-link") %>% html_attr('href')

# Dataframe of profile and associated link
df = data.frame(Profiles, Links)

# Prints NZ music profiles based on chars of given string
create_profile_list = function(cafe_name) {
    # Removes special symbols and whitespace from string
    cleaned_string = gsub("([^A-Za-z0-9])+", "", cafe_name)

    # Splits string into list for iteration
    char_list = strsplit(cleaned_string, "")[[1]]

    # The list of NZ profiles based on cafe name
    profile_list = data.frame()
    # print(char_list)
    for (char in char_list) {
        # Converts char into upper case for REGEX pattern
        upper_char = toupper(char)

        # Converts char into lower case for REGEX pattern
        lower_char = tolower(char)

        # Creates list of profiles that match REGEX pattern (Starts with given char)
        match_list = grep(glue('^[{upper_char}{lower_char}]'), Profiles, value = TRUE)
        
        # If the match list is not empty choose random profile from match list
        if (length(match_list) != 0) {

            profile_match = sample(match_list, 1)
            
            profile_discography = search_profile(profile_match)
            # print(profile_discography)
            profile_album_index = sample(nrow(profile_discography), 1) 
            profile_album = merge((data.frame(Character = upper_char, Profile = profile_match)), (profile_discography[profile_album_index,]))

        } else {
            # profile_discography = data.frame(Album = 's', Featuring = 's', Year = 's')
            # profile_discography
            # profile_album_index = 1
         
            profile_album = data.frame(Character = upper_char, Profile = NA_character_, Album = NA_character_, Featuring = NA_character_, Year = NA_character_)
            # print(profile_match)
            # print(dim(profile_list))
            # print(dim(profile_album))
        }

        # profile_album = merge((data.frame(Profile = profile)), (profile_discography[profile_album_index,]))
        
        profile_list = rbind(profile_list, profile_album)
    }
    return(profile_list)
}

# Takes a profile link as input
# Returns profile's discography with featuring artists as a dataframe
get_discography = function(name_link) {
    profile_discography_page = glue('https://www.audioculture.co.nz{name_link}/discography') %>% read_html()

    Album     = profile_discography_page %>% html_nodes("[class='header']") %>% html_text()
    Featuring = profile_discography_page %>% html_nodes("[class='body']")   %>% html_text()
    Year      = profile_discography_page %>% html_nodes("[class='year']")   %>% html_text()

    return(data.frame(Album, Featuring, Year))
}

# Takes a profile name as input
# Returns given name's discography (currenly no error handling if name not in dataframe)
search_profile = function(name) {
    name_link = (df %>% filter(str_detect(Profiles, name)))[[2]]

    return(get_discography(name_link))
}

In [107]:
create_profile_list('abcdefghijklmnopqrstuvwxyz1234567890')
# pog = search_profile('Dead Flowers')
# pog
# nrow(pog)

[1] 1
[1] ""


Album,Featuring,Year
<chr>,<chr>,<chr>
I Wanna Know Summer Tour EP,Dead Flowers (2),
Dead Flowers,Dead Flowers (2),1987.0
Lisa,Dead Flowers (2),1992.0
Be Someone / Underground,Dead Flowers (2),1993.0
Plastic,Dead Flowers (2),1993.0
Skin Of A Stone,Dead Flowers (2),1993.0
Sweetfish,Dead Flowers (2),1994.0
Watch Her Play,Dead Flowers (2),1994.0
Dead Boy,Dead Flowers (2),1994.0
Home,Dead Flowers (2),1994.0


In [71]:
# dim(data.frame(egg = c('d', 'd'), cih = c('s')))

In [207]:
# g = data.frame(c = c('1'), s = c('w'))
# # g
# # nrow(g)
sh = search_profile('Overlanders, The')
nrow(sh)
# xx = sh[1,][[1]]
# # length(nrow(sh))
# typeof(xx)
# if (xx == 'NA') {
#     print('pp')
# }

In [82]:
Sys.setenv(SPOTIFY_CLIENT_ID = '9b159833741d4e9db4362eda2e3520b7')
Sys.setenv(SPOTIFY_CLIENT_SECRET = 'db1a5685b9b1408783624a834c2973ab')

access_token <- get_spotify_access_token()

In [74]:
profile_list = create_profile_list('qwertyuiopasdfghjklzxcvbnm1234567890')

In [58]:
cat = profile_list[[1]]
cat
fart = get_artist_audio_features(cat)