In [1]:
# Imports for selenium
import re
import requests
import math
import json
import pandas as pd
from pprint import pprint
from bs4 import BeautifulSoup as bs
from splinter import Browser

def init_browser():   
    # Chrome driver
    executable_path = {"executable_path": "chromedriver"}
    return Browser("chrome", **executable_path, headless=False)

In [2]:
"""
This function is to parse out content using a regular expression pattern from a string.
<param>regex_pattern</param>
<param>string</param>
"""
def parse_definition(regex_pattern, string):
    result = re.compile(regex_pattern, flags=re.MULTILINE|re.DOTALL)
    # Checking if the patten works for the string
    if not result.search(string):
        return "None"
    else:
        return result.search(string).group(1)

In [3]:
def artist_code(artist):
    browser = init_browser()
    # URL for queries
    url = f'https://www.songkick.com/search?utf8=✓&type=initial&query={artist.replace(" ", "+")}'
    browser.visit(url)
    html = browser.html
    soup = bs(html, "html.parser")
    # Find the href for the URL with the page link
    container = soup.find("div", class_="sticky-container")
    row = container.find("div", class_="row")
    col = row.find("div", class_="col-8 primary")
    ul = col.find("ul")
    page_link = ul.find("a", class_="thumb")["href"]
    # Running the function
    artist_code = parse_definition('/artists/(\\d+)-', str(page_link))
    # Returns the artist code 
    return artist_code

In [4]:
# This will create a DB that will be thrown into a MongoDB to use
def concert_info(artist_code):
    # Initialzing Python dictionary
    concert_info = {}
    # Lists to store info to append to dictionary
    base_artist = []
    base_artist.append(artist_code)
    event_number = []
    event_name = []
    popularity = []
    location = []
    longitude = []
    latitude = []
    city = []
    start_date = []
    start_time = []
    for x in range(len(base_artist)):
        base_url = "https://api.songkick.com/api/3.0/artists/"+str(base_artist[x])+"/gigography.json?apikey=ORhDmrgUGkhdnXgP"+"&page=1"
        base_artist_2 = base_artist[x]
        total_entries = requests.get(base_url).json()['resultsPage']['totalEntries']
        pages = total_entries/50
        total_pages = math.ceil(pages)
        print('PRINTING EVENTS FOR ARTIST: ' + str(base_artist[x]))
        counter = 1
        
        for y in range(total_pages):
            artist_url = "https://api.songkick.com/api/3.0/artists/"+str(base_artist_2)+"/gigography.json?apikey="+"ORhDmrgUGkhdnXgP"+"&page="+(str(y+1))
            artist_json = requests.get(artist_url).json()
            for x in artist_json['resultsPage']['results']['event']:
                if str(x['location']['city']).split(",")[-1].strip() == "US":
                    event_number.append(counter)
                    event_name.append(str(x['displayName']))
                    popularity.append(str(x['popularity']))
                    location.append(str(x['venue']['displayName']))
                    longitude.append(str(x['venue']['lat']))
                    latitude.append(str(x['venue']['lng']))
                    city.append(str(x['location']['city']))
                    start_date.append(str(x['start']['date']))
                    start_time.append(str(x['start']['time']))
                    counter += 1
    artist_pd = pd.DataFrame({
        "Event Number": event_number,
        "Event Name": event_name,
        "Popularity": popularity,
        "Venue": location,
        "Longitude": longitude,
        "Latitude": latitude,
        "City": city,
        "Start Date": start_date,
        "Start Time": start_time
    })
    indexed_pd = artist_pd.set_index("Event Number")
    indexed_pd.to_csv(artist_code + "_events.csv")
    pd_json = json.loads(indexed_pd.reset_index().to_json(orient='records'))
    return pd_json

In [5]:
dream_events = concert_info(artist_code("Dream Theater"))
bruno_events = concert_info(artist_code("Bruno Mars"))
garth_events = concert_info(artist_code("Garth Brooks"))
eminem_events = concert_info(artist_code("Eminem"))
radiohead_events = concert_info(artist_code("Radiohead"))
beyonce_events = concert_info(artist_code("Beyonce"))
kanye_events = concert_info(artist_code("Kanye West"))
mana_events = concert_info(artist_code("Mana"))
odesza_events = concert_info(artist_code("Odesza"))
jcole_events = concert_info(artist_code("J Cole"))
mayer_events = concert_info(artist_code("John Mayer"))
russ_events = concert_info(artist_code("Russ"))
sza_events = concert_info(artist_code("SZA"))
trippie_events = concert_info(artist_code("Trippie Redd"))

PRINTING EVENTS FOR ARTIST: 8765
PRINTING EVENTS FOR ARTIST: 941964
PRINTING EVENTS FOR ARTIST: 283090
PRINTING EVENTS FOR ARTIST: 182968
PRINTING EVENTS FOR ARTIST: 253846
PRINTING EVENTS FOR ARTIST: 553938
PRINTING EVENTS FOR ARTIST: 552177
PRINTING EVENTS FOR ARTIST: 106220
PRINTING EVENTS FOR ARTIST: 6018139
PRINTING EVENTS FOR ARTIST: 832745
PRINTING EVENTS FOR ARTIST: 442137
PRINTING EVENTS FOR ARTIST: 407566
PRINTING EVENTS FOR ARTIST: 3352181
PRINTING EVENTS FOR ARTIST: 9212594


In [22]:
sza_events

[{'City': 'New York, NY, US',
  'Event Name': 'Misun, Cheers Elephant, SZA, Bianca Raquel, and 1 more… at Drom (October 16, 2012)',
  'Event Number': 1,
  'Latitude': '-73.9841494',
  'Longitude': '40.7252014',
  'Popularity': '0.006355',
  'Start Date': '2012-10-16',
  'Start Time': '21:00:00',
  'Venue': 'Drom'},
 {'City': 'Manhattan, NY, US',
  'Event Name': "Raw with Iman Omari, Rachel Brown, SZA, and 1 more… at S.O.B.'s (January 9, 2013)",
  'Event Number': 2,
  'Latitude': '-74.0051241',
  'Longitude': '40.7284492',
  'Popularity': '0.086669',
  'Start Date': '2013-01-09',
  'Start Time': '21:00:00',
  'Venue': "S.O.B.'s"},
 {'City': 'Brooklyn, NY, US',
  'Event Name': 'Chad Valley with Rush Midnight, SZA, and Letting Up Despite Great Faults at Glasslands Gallery (April 12, 2013)',
  'Event Number': 3,
  'Latitude': '-73.9664821',
  'Longitude': '40.715135',
  'Popularity': '0.104766',
  'Start Date': '2013-04-12',
  'Start Time': '20:30:00',
  'Venue': 'Glasslands Gallery'},
 {'

# Make Sqlite db of artist name

import pandas as pd
import numpy as np
import os

import sqlalchemy
from sqlalchemy import create_engine, MetaData
from sqlalchemy.orm import Session

import pymysql
pymysql.install_as_MySQLdb()

from sqlalchemy.ext.declarative import declarative_base

from sqlalchemy import Column, Integer, String, Numeric, Text, Float

engine = create_engine("sqlite:///artistname.sqlite")
conn = engine.connect()

Base = declarative_base()

class Stations(Base):
    __tablename__ = 'musicians'
    
    id = Column(Integer, primary_key=True)
    station = Column(Text)
    name = Column(Text)
    latitude = Column(Float)
    longitude = Column(Float)
    elevation = Column(Float)
    

Base.metadata.create_all(engine)

artists_df = pd.read_csv("static/db/artistlist.csv")

data = artists_df.to_dict(orient='records')

# Use MetaData from SQLAlchemy to reflect the tables
metadata = MetaData(bind=engine)
metadata.reflect()

artistlist = sqlalchemy.Table('musicians', metadata, autoload=True)

conn.execute(artistlist.insert(), data)