In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', 1000)
import collections
import pickle
from datetime import datetime
import math

### Get Data

In [2]:
def get_pull(date):
    shows = pd.read_pickle('pulls/pull_%s/shows_%s.pickle' % (date,  date))
    songs = pd.read_pickle('pulls/pull_%s/songs_%s.pickle' % (date,  date))
    live_songs = pd.read_pickle('pulls/pull_%s/live_songs_%s.pickle' % (date,  date))
    return shows, songs, live_songs

In [3]:
shows, songs, live_songs = get_pull('2020-12-19')

In [4]:
# Datetime
shows['date'] = pd.to_datetime(shows['date'], format='%m-%d-%Y')

### Create LaTeX Tables

In [5]:
# Helpful dictionaries

set_to_code = {'One Set' : 'S1',
               'Set 1' : 'S1',
               'Set 2' : 'S2',
               'Set 3' : 'S3',
               'Quarter 1': 'Q1',
               'Quarter 2' : 'Q2',
               'Quarter 3' : 'Q3',
               'Quarter 4' : 'Q4',
               'Overtime' : 'OT',
               'Encore' : 'E1',
               '2nd Encore' : 'E2',
               '3rd Encore' : 'E3'}

In [6]:
# Helper functions

def fix_table(text):
    if type(text) == str:
        text = text.replace('tabular', 'supertabular')
        text = text.replace('\\toprule', '')
        text = text.replace('\\bottomrule', '')
        return text
    else:
        text[0] = text[0].replace('tabular', 'supertabular')
        text[-1] = text[-1].replace('tabular', 'supertabular')
        for i in [1,-2]:
            text[i] = ''
        return text

def edit_table(file_name):
    with open(file_name,"r") as file: 
        text=file.readlines() 
    text = fix_table(text)
    with open(file_name,"w") as file: 
        file.writelines(text)
        
def clean_text(text):
    if text is not None:
        return text.replace('é', "\\'e").replace('&', "\\&").replace('#', "\\#").replace('$', "\\$")
    else:
        return None
    
def clean_file_name(text):
    return (text.replace(' ', '_')
                .replace('.','')
                .replace('&','')
                .replace('"','')
                .replace('#','')
                .replace('/',''))

def transition_to_tex(t, setlist=False):
    if setlist:
        return {None : '', 
                ',' : ', ', 
                '>' : ' \\textgreater \\enspace ', 
                '->' : ' \\textrightarrow  \\enspace '}[t]
    elif t is not None:
        return t.replace('->','\\textrightarrow').replace('>','\\textgreater')
    else:
        return ''

def table_list(ls, cols):
    rows = math.ceil(len(ls) / cols)
    table = {}
    for i in range(rows):
        table[i] = []
    for i in range(len(ls)):
        table[i % rows].append(ls[i])
    table = list(table.values())
    return pd.DataFrame(table).fillna('')

def order_sets(sets):
    ordered_sets = (['One Set'] + 
                ['Set %d' % (i) for i in range(1,4)] + 
                ['Quarter %d' % (i) for i in range(1,5)] + 
                ['Overtime', 'Encore', '2nd Encore', '3rd Encore'])
    tmp = []
    for i in ordered_sets:
        if i in sets:
            tmp.append(i)
    return tmp

In [7]:
# Song codes

tmp = songs.reset_index().rename(columns={'primary_key' : 'code'})
tmp = tmp.iloc[tmp.code.str.lower().argsort()]
tmp.at[tmp['name'].str.len() > 20, 'name'] = (
    tmp[tmp['name'].str.len() > 20]['name'].apply(lambda x: x[:18] + '..'))
tmp.to_latex(buf='tex/song_codes.tex',
             columns=['code', 'name'],
             index=False,
             column_format='ll',
             header=False)
edit_table('tex/song_codes.tex')

In [8]:
# Support

f = open("tex/support.tex", "w")
support_acts = list(shows['support'].drop_duplicates())
support_acts.remove(None)
support_acts.sort()
for support in support_acts:
    file_name = support.replace(' ', '_').replace('.','')
    tmp = shows[shows.support == support][['date', 'title']].sort_values('date') 
    tmp['date'] = tmp['date'].apply(lambda x : x.strftime('%m-%d-%Y'))
    f.write('\\noindent \\textbf{%s}\\newline\n\\vspace{10pt} \n' % (clean_text(support)))
    f.write('\\input{support/%s.tex}\\newline\n\\vspace{10pt} \n' % (file_name))
    tmp.to_latex(buf='tex/support/%s.tex' % (file_name),
                 index=False,
                 column_format='ll',
                 header=False)
    edit_table('tex/support/%s.tex' % (file_name))
f.close()

In [9]:
# VIP

tmp = shows[shows.vip == True][['date', 'title']].sort_values('date') 
tmp['date'] = tmp['date'].apply(lambda x : x.strftime('%m-%d-%Y'))
tmp.to_latex(buf='tex/vip.tex',
                 index=False,
                 column_format='ll',
                 header=False)
edit_table('tex/vip.tex')

In [10]:
# State Aggregate

tmp = shows.reset_index()
tmp['state'] = tmp['state'].fillna(' ')
num_shows =tmp.groupby(['state', 'country']).count()['primary_key'].to_dict()

tmp = live_songs.merge(shows.reset_index()
                            .rename(columns={'primary_key': 'show'}),
                       on='show')
tmp['state'] = tmp['state'].fillna(' ')
tmp = (tmp.groupby(['state','country'], as_index=False).sum()
          [['state', 'country', 'stewart','stewart_with_lyrics','hof']].sort_values(['country','state']))
tmp['stewart'] = tmp['stewart'].astype(int)
tmp['stewart_with_lyrics'] = tmp['stewart_with_lyrics'].astype(int)
tmp['hof'] = tmp['hof'].astype(int)
tmp['num_shows'] = tmp.apply(lambda x: num_shows[(x.state, x.country)], axis=1)
tmp = tmp[['state', 'country', 'num_shows', 'stewart','stewart_with_lyrics', 'hof']]
tmp.to_latex(buf='tex/state_special_counts.tex',
             index=False,
             column_format='llllll',
             header=False)
edit_table('tex/state_special_counts.tex')

In [11]:
# Venue

def simplify_venue_name(venue): 
    # Use "" to identify real venue name and remove excess
    if '"' in venue:
        venue = ''.join(venue.split('"')[:-1])
    # Get rid of any iteration number
    venue_split = venue.split(' ')
    iteration_strs = ([str(i) for i in list(range(1,20))] + 
                      ['I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII'])
    if venue_split[-1] in iteration_strs:
        venue = ' '.join(venue_split[:-1])
    else:
        venue = ' '.join(venue_split)
    # Standardize
    venue = venue.replace('&', 'and')
    venue = venue.replace('Amphitheatre', 'Amphitheater')
    venue = venue.replace('Music and Art', '')
    venue = venue.replace(' Music ', ' ')
    return venue

tmp = shows.reset_index()
tmp['venue'] = tmp['venue'].apply(simplify_venue_name)               
tmp_combined =  tmp

num_shows = tmp_combined.groupby(['venue']).count()['primary_key'].to_dict()

tmp = live_songs.merge(tmp_combined.reset_index()
                                  .rename(columns={'primary_key': 'show'}),
                       on='show')
tmp = (tmp.groupby(['venue'], as_index=False).sum()
          [['venue', 'stewart','stewart_with_lyrics','hof']].sort_values(['venue']))
tmp['stewart'] = tmp['stewart'].astype(int)
tmp['stewart_with_lyrics'] = tmp['stewart_with_lyrics'].astype(int)
tmp['hof'] = tmp['hof'].astype(int)
tmp['num_shows'] = tmp['venue'].apply(lambda x: num_shows[x])
tmp = tmp[['venue', 'num_shows', 'stewart','stewart_with_lyrics', 'hof']]
stats = tmp.set_index('venue')

for s in num_shows:
    if s not in stats.index:
        stats = stats.append(pd.Series({'num_shows' : num_shows[s],
                                         'stewart' : 0,
                                         'stewart_with_lyrics' : 0,
                                         'hof': 0}, name=s))

f = open("tex/venue.tex", "w")
tmp_sm = []
venues = list(num_shows)
venues.sort()
text = ''
for venue in venues:
    file_name = venue.replace(' ', '_').replace('.','').replace('/','').replace('"','')
    
    tmp = tmp_combined[tmp_combined.venue == venue][['date', 'title']].sort_values('date')    
    if len(tmp) >= 4:
        tmp['date'] = tmp['date'].apply(lambda x : x.strftime('%m-%d-%Y'))
        dates = table_list(list(tmp['date']),4)
        stat_string = '\\qquad'.join([str(x) for x in list(stats.loc[venue])])
        f.write('\\begin{center}\\textbf{%s}\\end{center}\n' % (clean_text(venue)))
        f.write('\\begin{center}%s\\end{center} \n' % (stat_string))
        f.write('\\input{venue/%s.tex}\\newline \n' % (file_name))
        dates.to_latex(buf='tex/venue/%s.tex' % (file_name),
                       index=False,
                       column_format='llll',
                       header=False)
        edit_table('tex/venue/%s.tex' % (file_name))
    else:
        tmp['date'] = tmp['date'].apply(lambda x : x.strftime('%m-%d-%y'))
        title = '\\textbf{%s}' % (clean_text(venue))
        tmp_sm.append([title] + [', '.join(list(tmp['date']))] + list(stats.loc[venue]))
f.close()
pd.DataFrame(tmp_sm).to_latex(buf='tex/venue_sm.tex',
                              index=False,
                              column_format='llllll',
                              header=False,
                              escape=False)
edit_table('tex/venue_sm.tex')

In [12]:
# Setlists
tmp = shows.sort_values('date')
f = open("tex/setlists.tex", "w")
for index, row in list(tmp.iterrows()):
    header = ('\\noindent\\underline{\\textbf{%s\\quad %s}} \\newline \n\n' % 
                (row['date'].strftime('%m-%d-%Y'), clean_text(row['title'])))
    f.write(header)
    
    show_songs = live_songs[live_songs.show == index]
    
    sets = order_sets(list(show_songs['set'].drop_duplicates())) 
    setlist_table = []
    for set_name in sets:
        setlist = ''
        tmp = show_songs[show_songs.set == set_name]
        
        n = int(list(tmp[tmp.out_transition.isna()]['order'])[0])
        for i in range(1,n+1):
            song = tmp[tmp.order == i].iloc[0]
            name = clean_text(songs.loc[song['song']]['name'])
            superscript = ''
            if song['stewart']:
                superscript += '$\\wedge$'
            if song['stewart_with_lyrics']:
                superscript += '*'
            if song['hof']:
                superscript += 'H'
                
            tag = '' if song['tag'] == -1 else '[%d]' % (song['tag'])
            trans = transition_to_tex(song['out_transition'], setlist=True)
            
            if len(superscript) > 0:
                setlist += '\\textbf{%s%s}\\textsuperscript{%s}%s' % (name, tag, superscript, trans)
            else:
                setlist += '%s%s%s' % (name, tag, trans)

        setlist_table.append(['\\noindent\\textbf{%s}: ' % (set_to_code[set_name]),setlist])
    
    if len(sets) > 0:
        setlist_table = pd.DataFrame(setlist_table)
        f.write(fix_table(setlist_table.to_latex(buf=None,
                                                 index=False,
                                                 column_format='p{0.03\\textwidth}p{0.97\\textwidth}',
                                                 header=False,
                                                 escape = False)) + '\\newline \n\n')
    else:
        f.write('Setlist unavailable. \\newline \n\n')
            
    tags = row['tags']
    if len(tags) > 0:
        f.write('\\begin{enumerate} \n')
        for i in tags:
            tag_str = clean_text('[%d] %s' % (i,tags[i]))
            f.write('\\item[] %s \n' % (tag_str))
        f.write('\\end{enumerate} \n\\vspace{3pt}\n')
    
    notes = row['notes']
    if notes is not None and len(notes) > 0:
        f.write('\\textbf{Notes:} \\begin{enumerate} \n')
        for note in notes:
            f.write('\\item[] %s' % (clean_text(note)))
        f.write('\\end{enumerate} \n\\vspace{3pt}\n')
    
    support = clean_text(row['support'])
    if support is not None:
        f.write('\\noindent \\textbf{Support:} %s \\newline \n\n' % (support))
        
f.close()

In [13]:
# Song instances

f = open("tex/song_instances.tex", "w")
show_to_date = shows['date'].to_dict()
code_to_song = songs['name'].to_dict()
song_to_code = {v:k for (k,v) in code_to_song.items()}

tmp_full = live_songs.merge(shows.reset_index().rename(columns={'primary_key' : 'show'}), on='show')
tmp_full = tmp_full.sort_values('date')
tmp_full['date'] = tmp_full['date'].apply(lambda x : x.strftime('%m-%d-%y'))

for song in list(song_to_code):
    name = song
    code = song_to_code[name]
    instances = []
    file_name = clean_file_name(song)
    
    tmp = tmp_full[tmp_full.song == code]
    
    for index, row in tmp.iterrows():
        show = row['show']
        date = row['date']
        set_code = set_to_code[row['set']]
        order = row['order']
        prev_song = ''
        in_transition = ''
        out_transition = transition_to_tex(row['out_transition'], setlist=False)
        next_song = ''
        if out_transition is not '':
            next_song = (live_songs[(live_songs.show == show) & 
                                    (live_songs.set == row['set']) & 
                                    (live_songs.order == order+1)].iloc[0]['song'])
        else:    
            out_transition = ''
            next_song = '*C%s*' % (set_code)
        if order != 1:
            next_song_row = (live_songs[(live_songs.show == show) & 
                                        (live_songs.set == row['set']) & 
                                        (live_songs.order == order-1)].iloc[0])
            prev_song = next_song_row['song']
            in_transition = transition_to_tex(next_song_row['out_transition'], setlist=False)
        else:
            prev_song = '*O%s*' % (set_code)
            
        instances.append([date, prev_song, in_transition, out_transition, next_song])
        
    f.write('\\begin{center}\\textbf{%s}\\end{center}\n' % (clean_text(name)))
    f.write('\\input{song_instances/%s.tex}\\newline\n\\vspace{10pt} \n' % (file_name))
    instances = pd.DataFrame(instances)
    instances.to_latex(buf='tex/song_instances/%s.tex' % (file_name),
                       index=False,
                       column_format='lllll',
                       header=False,
                       escape = False) 
    edit_table('tex/song_instances/%s.tex' % (file_name))
f.close()