In [2]:
library(tidyverse)
library(magrittr) # better handling of pipes
library(purrr) # to work with lists and map functions
library(glue) # to paste strings
library(stringr) # to hand strings
library(rvest) # rvest makes scraping easier
library(polite) # polite is the "polite" version of rvest
library(xml2) # makes it easier to work with HTML and XML from R

── [1mAttaching packages[22m ─────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.3.6     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.7     [32m✔[39m [34mdplyr  [39m 1.0.9
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1
── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Attaching package: 'magrittr'


The following object is masked from 'package:purrr':

    set_names


The following object is masked from 'package:tidyr':

    extract



Attaching package: 'rvest'


The following object is masked from 'package:readr':

    guess_encoding




In [115]:
# The website in html
audio_culture_page = read_html("https://www.audioculture.co.nz/music_index?category=Person")

# Profile names
Profiles = audio_culture_page %>% html_nodes(".skippy-col-link") %>% html_text()

# Profile links
Links = audio_culture_page %>% html_nodes(".skippy-col-link") %>% html_attr('href')

# Dataframe of profile and associated link
df = data.frame(Profiles, Links)

# Prints NZ music profiles based on chars of given string
create_profile_list = function(cafe_name) {
    # Removes special symbols and whitespace from string
    cleaned_string = gsub("([^A-Za-z0-9])+", "", cafe_name)

    # Splits string into list for iteration
    char_list = strsplit(cleaned_string, "")[[1]]

    # The list of NZ profiles based on cafe name
    profile_list = data.frame()
    
    # Iterate through every char in the given string
    for (char in char_list) {
        # Converts char into upper case for REGEX pattern
        upper_char = toupper(char)

        # Converts char into lower case for REGEX pattern
        lower_char = tolower(char)

        # Creates list of profiles that match REGEX pattern (Starts with given char)
        match_list = grep(glue('^[{upper_char}{lower_char}]'), Profiles, value = TRUE)
        
        # If the match list is not empty choose random profile from match list
        if (length(match_list) != 0) {
            # Randomly selected profile
            profile_match = sample(match_list, 1)
#             print(profile_match)
            # The profile's discography
            profile_discography = search_profile(profile_match)
#             print(profile_discography)
            # Sometimes profiles have empty discographies
            if (nrow(profile_discography) == 0) {
                profile_album = data.frame(Character = upper_char, 
                                           Profile = profile_match, 
                                           Album = NA_character_, 
                                           Featuring = NA_character_, 
                                           Year = NA_character_)
            } else {
                # Randomly selects album index from discography
                profile_album_index = sample(nrow(profile_discography), 1) 

                # Create data frame entry for profile_lsit
                profile_album = merge((data.frame(Character = upper_char, Profile = profile_match)), 
                                      (profile_discography[profile_album_index,]))
            }
            
        } else {
            # If no match, create empty data frame entry for profile_list
            profile_album = data.frame(Character = upper_char, 
                                       Profile = NA_character_, 
                                       Album = NA_character_, 
                                       Featuring = NA_character_, 
                                       Year = NA_character_)
        }

        # Appends profile_list with new profile
        profile_list = rbind(profile_list, profile_album)
    }
    return(profile_list)
}

# Takes a profile link as input
# Returns profile's discography with featuring artists as a dataframe
get_discography = function(name_link) {
    profile_discography_page = glue('https://www.audioculture.co.nz{name_link}/discography') %>% read_html()

    Album     = profile_discography_page %>% html_nodes("[class='header']") %>% html_text()
    Featuring = profile_discography_page %>% html_nodes("[class='body']")   %>% html_text()
    Year      = profile_discography_page %>% html_nodes("[class='year']")   %>% html_text()
    
    # If the year length is less than 4, replace with NA
    Year[sapply(Year, nchar) < 4] = NA_character_
    
    # Data frame of the given profile's discography
    discography_df = data.frame(Album, Featuring, Year)

    # Replaces empty entries with NA
    discography_df[discography_df == ""] = NA_character_

    return(discography_df)
}

# Takes a profile name as input
# Returns given name's discography (currenly no error handling if name not in dataframe)
search_profile = function(name) {
    name_link = (df %>% filter(str_detect(Profiles, name)))[[2]][1]
    
    return(get_discography(name_link))
}

In [125]:
create_profile_list('RRRRRRRRRRRRR')

[1] "/profile/ragnarok"
[1] "/profile/roy-colbert"
[1] "/profile/the-renderers"
[1] "/profile/raice-mcleod"
[1] "/profile/rim-d-paul-and-the-quin-tikis"
[1] "/profile/russ-le-roq"
[1] "/profile/ritchie-venus-band"
[1] "/profile/the-remarkables"
[1] "/profile/ron-kane"
[1] "/profile/rufus-rehu"
[1] "/profile/reece-kirk"
[1] "/profile/ron-kane"
[1] "/profile/rotorua-maori-choir"


Character,Profile,Album,Featuring,Year
<chr>,<chr>,<chr>,<chr>,<chr>
R,Ragnarok,Help Us Help You,Flying Wild,1971.0
R,Roy Colbert,Compiletely Bats,"Bats, The",2010.0
R,"Renderers, The",A Rocket Into Nothing,"Renderers, The",2011.0
R,Raice McLeod,The Spirit Of Chistmas,Chuck Brown,1999.0
R,Rim D. Paul and The Quin Tikis,The Fantastic Maori Quintikis Showband,Maori Quintikis Showband*,1968.0
R,Russ le Roq,I Just Want To Be Like Marlon Brando,Russ Le Roq,1982.0
R,Ritchie Venus,,,
R,"Remarkables, The",Vegetarian / Skin Condition,"Remarkables, The (2)",1986.0
R,Ron Kane,Ten Guitars,"Decayes, The",1983.0
R,Rufus Rehu,Make Friends With The Quin Tikis,The Quin Tikis*,1968.0
