In [12]:
# STEP 1: Install required packages

# this section is just gettign all of the liabarys that are needed
install.packages('tidyverse')
install.packages('polite')
install.packages('xml2')
install.packages('purrr')
install.packages('glue') 
install.packages('rvest')

library(tidyverse)
library(polite) # this library is jsut a better version of the rvest library 
library(xml2) # makes it easier to work with HTML and XML from R
library(purrr) # to work with lists and map functions
library(glue) # to paste strings
library(rvest) # rvest makes scraping easier

"package 'tidyverse' is in use and will not be installed"
"package 'polite' is in use and will not be installed"
"package 'xml2' is in use and will not be installed"
"package 'purrr' is in use and will not be installed"
"package 'glue' is in use and will not be installed"
"package 'rvest' is in use and will not be installed"


In [13]:
# The Neat Places website URL
url <- "https://neatplaces.co.nz/places/christchurch-canterbury/eat-drink/cafes"

# The Neat Places website HTML
page <- read_html(url)

# Vector of Christchurch cafe names
# Scans the page HTML for the specified node and returns the text
cafe_name <- page %>% 
   html_nodes(".list-item-title") %>% 
   html_text()

# Dataframe of Christchurch cafe names
cafe_names <- tibble(cafe_name)

# Vector of Christchurch cafe descriptions
summary <- page %>%
  html_nodes(".summary") %>%
  html_text()

# Vector of links to each of the cafes
# Searches for specified node and HREFhref attribute to get links
link <- page %>% 
  html_nodes(".list-item a") %>% 
  html_attr('href')

# Dataframe of cafe names and related page link
link_table <- tibble(cafe_name, link)

In [14]:
# Function which outputs data for different places in nz as a search option
# Takes in a city in NZ and returns its avalable cafes
change_place <- function(place_name) {
    # Converts given place name to URL format
    url_name <- place_name %>%
        tolower() %>% 
        str_replace_all(" & ", "-") %>%
        str_replace_all(" ", "-") %>%
        str_replace_all("ō", "o") %>%
        str_replace_all("ā", "a")
    
    # The generated URL
    new_url <- glue("https://neatplaces.co.nz/places/{url_name}/eat-drink/cafes") 

    # The page HTML of the generated URL
    page <- read_html(new_url) 
    
    # Vector of the cafe names
    cafe_data <- page %>% 
        html_nodes(".list-item-title p") %>%
        html_text()

    return(cafe_data)
}     

In [15]:
# The AudioCulture website in html
audio_culture_page = read_html("https://www.audioculture.co.nz/music_index?category=Person")

# Profile names
Profiles = audio_culture_page %>% html_nodes(".skippy-col-link") %>% html_text()

# Profile links
Links = audio_culture_page %>% html_nodes(".skippy-col-link") %>% html_attr('href')

# Dataframe of profile and associated link
df = data.frame(Profiles, Links)

In [16]:
# Prints NZ music profiles based on chars of given string
create_profile_list = function(cafe_name) {
    # Removes special symbols and whitespace from string
    cleaned_string = gsub("([^A-Za-z0-9])+", "", 
        cafe_name %>% 
        str_replace_all("é", "e") %>%
        str_replace_all("ō", "o") %>%
        str_replace_all("ā", "a"))

    # Splits string into list for iteration
    char_list = strsplit(cleaned_string, "")[[1]]

    # The list of NZ profiles based on cafe name
    profile_list = data.frame()
    
    # Iterate through every char in the given string
    for (char in char_list) {
        # Converts char into upper case for REGEX pattern
        upper_char = toupper(char)

        # Converts char into lower case for REGEX pattern
        lower_char = tolower(char)

        # Creates list of profiles that match REGEX pattern (Starts with given char)
        match_list = grep(glue('^[{upper_char}{lower_char}]'), Profiles, value = TRUE)
        
        # If the match list is not empty choose random profile from match list
        if (length(match_list) != 0) {
                
            # Does at most 5 attempts to find at least 1 album from a profile
            for (i in 1:5) {
                # Randomly selected profile
                profile_match = sample(match_list, 1)

                # The profile's discography
                profile_discography = get_discography(profile_match)
                
                if (nrow(profile_discography) >= 1) {
                    break()
                }
            }

            # Sometimes profiles have empty discographies
            if (nrow(profile_discography) == 0) {
                profile_album = data.frame(Character = upper_char, 
                                           Profile = profile_match, 
                                           Album = NA_character_, 
                                           Featuring = NA_character_, 
                                           Year = NA_real_)
            } else {
                # profile_album_index = match(profile_discography[3] >= 2010)[2]


                # Randomly selects album index from discography
                profile_album_index = sample(nrow(profile_discography), 1) 

                # Create data frame entry for profile_lsit
                profile_album = merge((data.frame(Character = upper_char, Profile = profile_match)), 
                                      (profile_discography[profile_album_index,]))
            }
            
        } else {
            # If no match, create empty data frame entry for profile_list
            profile_album = data.frame(Character = upper_char, 
                                       Profile = NA_character_, 
                                       Album = NA_character_, 
                                       Featuring = NA_character_, 
                                       Year = NA_real_)
        }

        # Appends profile_list with new profile
        profile_list = rbind(profile_list, profile_album)
    }

    return(profile_list)
}

In [17]:
# Takes a profile name as input
# Returns given name's discography (currenly no error handling if name not in dataframe)
get_discography = function(name) {
    name_link = (df %>% filter(str_detect(Profiles, name)))[[2]][1]
    
    profile_discography_page = glue('https://www.audioculture.co.nz{name_link}/discography') %>% read_html()

    Album     = profile_discography_page %>% html_nodes("[class='header']") %>% html_text()
    Featuring = profile_discography_page %>% html_nodes("[class='body']")   %>% html_text()
    Year      = profile_discography_page %>% html_nodes("[class='year']")   %>% html_text() %>% as.numeric()
    
    # Replaces empty entries with NA
    Album[Album == ""] = NA_character_

    # Replaces empty entries with NA
    Featuring[Featuring == ""] = NA_character_

    # If the year length is less than 1000, replace with NA
    Year[Year < 1000] = NA_real_
    
    # Data frame of the given profile's discography
    discography_df = data.frame(Album, Featuring, Year)

    return(discography_df %>% filter(Year >= 1000))
}

In [8]:
# STEP 2: Select the index of the cafe you would like a playlist of

# Dataframe of Christchurch cafes to select from
chch_cafes_df = data.frame(Index = 1:nrow(cafe_names), Cafe = cafe_names, Summary = summary)
chch_cafes_df

Index,cafe_name,Summary
<int>,<chr>,<chr>
1,Mind Your Temper,"If you’re feeling grumpy or a little blue, a visit to Mind Your Temper is sure to lighten your mood."
2,Child Sister,"Seeking a calm environment with aesthetically-pleasing interior decor, complete with incredible coffee and delicious food to match? Then look no further, Child Sister has..."
3,Tom's,"If you are a sando aficionado and find yourself in the South Town area of Christchurch city centre, keep an eye out for the striking cobalt blue hue of Tom’s."
4,Bonobo Café,"Bonobo: a benevolent primate, a trip-hop artist and more recently a plant-based cuisine destination in the seaside suburb of Sumner."
5,The Old Vicarage,"The Old Vicarage is exactly that, the former home of the Vicars from St Mary’s Church in Halswell."
6,General Post,"You’ll find a few golden nuggets interspersed around Tai Tapu, and General Post is one of them."
7,Truffle Café,You may think of truffle as clandestine but this Merivale café - with the same name - is anything but.
8,The Mediterranean Food Company,Down the industrial eastern end of Tuam Street in an old brickery is a secluded slice of Italy.
9,Addington Coffee Co-op,"Addington Coffee Co-op is, simply put, a true Christchurch institution with laid-back vibes and house-roasted coffee."
10,Six Ounces,There’s nothing quite like a cosy neighbourhood haunt and six ounces is exactly that.


In [9]:
# STEP 3: Set the index value below to the cafe you desire

# E.g. Herba Gourmet has index 49
index = 49

In [10]:
# STEP 4: Run this code to generate a random playlist
selected_cafe = chch_cafes_df[[2]][index]
create_profile_list(selected_cafe)

Character,Profile,Album,Featuring,Year
<chr>,<chr>,<chr>,<chr>,<dbl>
H,Headband,Love Is Bigger Than The Whole Wide World,Headband,1972
E,Edward Castelow,Mufti Day,Dictaphone Blues,2014
R,Ragnarok,Ragnarok,Ragnarok (4),1975
B,Broods,Broods,Broods,2014
A,Alastair Dougal,Somewhere In New Zealand Tonight,Glen Moffatt,1995
G,Gentle Annie,The Devil Went Down To Auckland,Gentle Annie,1982
O,OMC,How Bizarre,OMC,1995
U,Urban Disturbance,37 Degrees Lattitude,Urban Disturbance (3),1994
R,Rochelle Vinsen,I Like Your Kind Of Love,Jim McNaught And Rochelle Vinsen,1963
M,Mahinaarangi Tocker,Hei Ha!,Mahinarangi Tocker,2002
