In [None]:
import requests
from bs4 import BeautifulSoup
from enum import Enum
from datetime import datetime
import json
from tabulate import tabulate
import re
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

red_rocks_url = 'https://www.redrocksonline.com/events/'
mission_ballroom_url = 'https://www.missionballroom.com/data/events-index.json'
vinyl_url = 'https://vinylnightclub.com/upcoming-denver-colorado-nightlife-club-events-shows-concerts-near-me/'
church_url = 'https://churchnightclubco.com/upcoming-underground-club-events-electronic-dance-music-near-me-denver-co/'
ogden_url = 'https://aegwebprod.blob.core.windows.net/json/events/7/events.json'
reelworks_url = 'https://reelworksdenver.com/events/'
meow_wolf_url = 'https://tickets.meowwolf.com/events/denver/'
tracks_file = 'tracks.json'

# fake headers to use when a site requires them
fake_headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
    "Accept-Encoding": "gzip, deflate, br",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
    "TE": "Trailers"
}

class Location(Enum):
    REDROCKS = 'Red Rocks'
    MISSION_BALLROOM = 'Mission Ballroom'
    VINYL = 'Club Vinyl'
    CHURCH = 'The Church Nightclub'
    OGDEN = 'Ogden Theatre'
    REELWORKS = 'ReelWorks'
    MEOW_WOLF = 'Meow Wolf'

class Event:
    def __init__(self, date, headliner, openers, location):
        self.date = date
        self.headliner = headliner
        self.openers = openers
        self.location = location

    def __str__(self):
            return (
                f"Date: {self.date.strftime("%m-%d-%Y") if self.date else 'TBD'}\n"
                f"Headliner: {self.headliner}\n"
                f"Openers: {self.openers}\n"
                f"Location: {self.location.value}"
            )

def get_red_rocks_events():
    events = []
    response = requests.get(red_rocks_url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
    
        for event in soup.find_all('div', 'card-content'):
            date_text = event.find('div', class_='date').text.strip()
            if ' pm' not in date_text:
                continue #logic to skip duplicate entries - TODO: investigate and refactor
            date = datetime.strptime(date_text, "%a, %b %d, %I:%M %p")
            date = date.replace(year=datetime.now().year)
            headliner = event.find('h3', class_='card-title').text.strip()
            openers = ''
            if event.find('p', class_='hide-mobile'):
                openers = event.find('p', class_='hide-mobile').text.strip()
            events.append(Event(date, headliner, openers, Location.REDROCKS))
    else:
        print(f'Failed to retrieve page. Status code: {response.status_code}')

    return events

def get_mission_ballroom_events():
    events = []
    response = requests.get(mission_ballroom_url)
    
    if response.status_code == 200:
        data = response.json()

        for event in data:
            date_text = f"{event['date']}, {event['doorDateTime']}"
            date = datetime.strptime(date_text, "%A, %B %d, %Y, %I:%M%p")
            headliner = event['title']
            openers = event['subtitle']
            events.append(Event(date, headliner, openers, Location.MISSION_BALLROOM))
    else:
        print(f'Failed to retrieve page. Status code: {response.status_code}')

    return events

def get_vinyl_events():
    return get_club_events(vinyl_url, Location.VINYL)

def get_church_events():
    return get_club_events(church_url, Location.CHURCH)

def get_club_events(url, club_name):
    events = []
    response = requests.get(url, headers=fake_headers)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        event_divs = soup.find_all('div', class_='events__item')

        for event in event_divs:
            date_text = event.find('div', class_='event__date event__date-list').text.strip()
            date = datetime.strptime(date_text, "%m.%d.%y")
            title = event.find('span', class_='event__content-title').text.strip()
            events.append(Event(date, title, '', club_name))
    else:
        print(f'Failed to retrieve page. Status code: {response.status_code}')

    return events

def get_ogden_events():
    events = []
    response = requests.get(ogden_url)
    
    if response.status_code == 200:
        data = response.json()
        events_json = data['events']
        for event in events_json:
            date_text = event['eventDateTime']
            date = datetime.fromisoformat(date_text) if date_text != 'TBD' else None
            headliner = event['title']['headlinersText']
            openers = event['title']['supportingText'] or ''
            events.append(Event(date, headliner, openers, Location.OGDEN))
    else:
        print(f'Failed to retrieve page. Status code: {response.status_code}')

    return events

def get_reelworks_events():
    events = []
    response = requests.get(reelworks_url, headers=fake_headers)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
    
        for event in soup.find_all('div', class_='event-item-info'):
            date_text = event.find('h3').text.strip()
            date = datetime.strptime(date_text, "%A, %b %d, %Y")
            headliner = event.find('h1').text.strip()
            openers = ''
            if event.find('h4'):
                openers = event.find('h4').text.strip() #TODO: handle different formatting for openers
            events.append(Event(date, headliner, openers, Location.REELWORKS))
    else:
        print(f'Failed to retrieve page. Status code: {response.status_code}')
    
    return events

def get_meow_wolf_events():
    events = []
    
    options = Options()
    options.add_argument('--headless')
    driver = webdriver.Chrome(options=options)
    driver.get(meow_wolf_url)
    driver.implicitly_wait(10) 
    driver.find_element(By.CLASS_NAME, "details__Uo5Th") # Wait for JavaScript to load the content
    html = driver.page_source
    
    soup = BeautifulSoup(html, 'html.parser')
    for event in soup.find_all('div', class_='details__Uo5Th'):
        date_text = event.find('div', class_='date__3PFQZ').text.strip().replace("Doors @ ", "")
        cleaned_date_text = re.sub(r'(\d+)(st|nd|rd|th)', r'\1', date_text)
        date = datetime.strptime(cleaned_date_text, "%b %d %I:%M %p")
        date = date.replace(year=datetime.now().year)
        headliner = event.find('div', class_='title__1ZRxy').text.strip()
        openers = ''
        if event.find('div', class_='guest-acts___tZRb'):
            openers = event.find('div', class_='guest-acts___tZRb').text.strip().replace(" | ", ", ")
        events.append(Event(date, headliner, openers, Location.MEOW_WOLF))

    driver.quit()
    return events

def get_all_events():
    events = get_red_rocks_events()
    events += get_mission_ballroom_events()
    events += get_vinyl_events()
    events += get_church_events()
    events += get_ogden_events()
    events += get_reelworks_events()
    events += get_meow_wolf_events()
    return events

def read_tracks_from_file():
    with open(tracks_file, 'r') as file:
        return json.load(file)

def get_artist_save_counts(tracks):
    artists = {}

    for track in tracks:
        for artist in track['track']['artists']:
            name = artist['name']
            count = artists.get(name)
            if count is not None:
                artists[name] = count + 1
            else:
                artists[name] = 1

    # return sorted dictionary of artists in descending order of track save count
    return dict(sorted(artists.items(), key=lambda item: item[1], reverse=True))

def get_relevant_shows(shows, artists):
    relevant_shows = {}
    for key, value in artists.items():
        for show in shows:
            openers = [item.strip().lower() for item in show.openers.split(',')]
            key_lower = key.lower()
            # TODO: handle edge cases (multiple headliners, extra words like "(DJ set)", etc.)
            if key_lower == show.headliner.strip().lower() or key_lower in openers:
                relevance = relevant_shows.get(show)
                if relevance is not None:
                    relevant_shows[show] = relevance + value
                else:
                    relevant_shows[show] = value

    # Return sorted dictionary of shows by descending relevance
    return dict(sorted(relevant_shows.items(), key=lambda item: item[1], reverse=True))

def print_shows(show_dict):
    rows = [
        [relevance, show.headliner, show.openers, show.date.strftime("%m-%d-%Y") if show.date else 'TBD', show.location.value]
        for show, relevance in show_dict.items()
    ]
    headers = ["Relevance", "Headliner", "Openers / Info", "Date", "Location"]
    print(tabulate(rows, headers=headers, tablefmt="grid", maxcolwidths=50))

all_events = get_all_events()
all_tracks = read_tracks_from_file()
artist_counts = get_artist_save_counts(all_tracks)
relevant_shows = get_relevant_shows(all_events, artist_counts)
print_shows(relevant_shows)