In [66]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

%matplotlib inline

In [10]:
# Load the data
df = pd.read_csv('data/monthly-events/january.csv',index_col=0)
df

Unnamed: 0,Cost,Date-range,Time,Title,Venue,event_text
0,R80 to R95,31 January 2018 to 8 February 2018,8pm,When Swallows Cry,"Baxter Theatre Centre, Main Rd, Rondebosch, Ca...",Mike van Graan’s When Swallows Cry is a hard-h...
1,Free,18 January 2018 to 10 March 2018,Time: Opening night 6pm to 8pm | Mon – Fri 9am...,Kemang Wa Lehulere @ Stevenson Gallery,"Stevenson Gallery, Buchanan Building, 160 Sir ...","Stevenson presents Here I am, a concrete man, ..."
2,R100,,Fridays 4pm,Pink Summer Soirées,"Radisson Blu Hotel, 22 Riebeek St, Cape Town",The Ghibli Bar at Radisson Blu Hotel presents ...
3,R150 to R180,27 to 27 January 2018,8pm to 10pm,Mozart Winds: CT Concert Series,"Baxter Concert Hall, 2 Main Rd, Rondebosch, Ca...",Following the UCT Summer School’s Magic of Moz...
4,Free,25 January 2018 to 10 February 2018,Time: Tue to Fri 10am to 5pm | Sat 10am to 2pm,StateoftheART: The Marks we Make,"Venue: 50 Buitenkant St, CBD, Cape Town",\nStateoftheART gallery presents a group exhib...
5,Cost: Free Entrance,30 January 2018 to 6 March 2018,Time: 9am to 5pm Monday to Friday,New Romantics: Barnard Gallery,"55 Main Street, Newlands, Cape Town",The Barnard Gallery presents the New Romantics...
6,R30 | U18s R15 | Concessions R15,22 September 2017 to 31 March 2018,10am to 5pm,Derek Bauer Illustrations at Iziko,"Iziko South African National Gallery, Governme...","Derek Bauer’s critical cartooning works, Alter..."
7,R89 – R175,31 October 2017 to 27 April 2018,Time: 6pm | Sat 5pm | Sun 4pm,The Galileo Open Air Cinema,Various,The Galileo Open Air Cinema is back again with...
8,R137.50 | Children R67.50,1 November 2017 to 28 February 2018,6pm to 8pm,Table Mountain Cableway Sunset Special,"Table Mountain Aerial Cableway Station, Tafelb...",The Table Mountain Cableway offers a Sunset Sp...
9,Free entrance,20 November 2017 to 20 February 2018,Mon to Fri 9.30am to 5pm | Sat 9.30am to 1pm,Eclectica: Divergent Tides,"Venue: Eclectica Gallery, 179 Buitengracht St,...",\nEclectica presents a group exhibition featur...


## Split the Date Range into start and stop dates

In [11]:

# split date range into start and stop date
date_ranges = df['Date-range'].values

split_dates = [tuple(date.split(' to ')) if not pd.isnull(date) else ('-','-') for date in date_ranges ]

date_dicts = [ {'start': split_date[0], 'stop': split_date[-1]} for split_date in split_dates ]

starts = []
stops = []
for d in date_dicts:
    
    if d['start'] == '-':
        stops.append(pd.NaT)
        starts.append(pd.NaT)
        continue
        
    start = d['start']
    stop = pd.to_datetime(d['stop'])
#     If just a day is given
    if len(start) <= 2:
        start = pd.datetime(stop.year, stop.month, int(start))
    
    starts.append(str(start))
    stops.append(str(stop))
    
df['Start-date'] = starts
df['End-date'] = stops
df.drop('Date-range',inplace=True, axis=1)

df.head()

Unnamed: 0,Cost,Time,Title,Venue,event_text,Start-date,End-date
0,R80 to R95,8pm,When Swallows Cry,"Baxter Theatre Centre, Main Rd, Rondebosch, Ca...",Mike van Graan’s When Swallows Cry is a hard-h...,2018-01-31,2018-02-08
1,Free,Time: Opening night 6pm to 8pm | Mon – Fri 9am...,Kemang Wa Lehulere @ Stevenson Gallery,"Stevenson Gallery, Buchanan Building, 160 Sir ...","Stevenson presents Here I am, a concrete man, ...",2018-01-18,2018-03-10
2,R100,Fridays 4pm,Pink Summer Soirées,"Radisson Blu Hotel, 22 Riebeek St, Cape Town",The Ghibli Bar at Radisson Blu Hotel presents ...,NaT,NaT
3,R150 to R180,8pm to 10pm,Mozart Winds: CT Concert Series,"Baxter Concert Hall, 2 Main Rd, Rondebosch, Ca...",Following the UCT Summer School’s Magic of Moz...,2018-01-27,2018-01-27
4,Free,Time: Tue to Fri 10am to 5pm | Sat 10am to 2pm,StateoftheART: The Marks we Make,"Venue: 50 Buitenkant St, CBD, Cape Town",\nStateoftheART gallery presents a group exhib...,2018-01-25,2018-02-10


## Fix Cost


In [88]:
costs = df['Cost']

# First replace lots of stuff
costs = costs.str.replace('Cost:', '')
costs = costs.str.replace('R', '')
costs = costs.str.replace(r'\(.*\)','')  

#### Single numbers - convert to number

In [89]:
def fix_single_numbers(x):
    x = str(x)
    
    if len(x.split(' ')) == 1:
        try:
            return float(x)
        except ValueError:
            return x
    return x

fixed_costs = costs.apply(fix_single_numbers)

#### Fix free events to 0 cost

In [90]:
def check_free(x):
#     Multiple options - not necessarily free
    if '|' in str(x):
        return x
    elif "Free" in str(x) or "free" in str(x):
        return 0
    else:
        return x

fixed_costs = costs.apply(check_free)

#### Convert multiple prices to average price

In [91]:
def isDigit(d):
    try:
        float(d)
        return True
    except ValueError:
        return False

def avg_cost(x):
    """
    If the cost is a range,
    return the average value
    """
    digits = [float(s) for s in str(x).split() if isDigit(s)]
    
    if digits and sum(digits) > 0:
        return np.mean(digits)
    else:
        return x

fixed_costs = fixed_costs.apply(avg_cost)

In [96]:
df['Cost'] = fixed_costs
df.head()

Unnamed: 0,Cost,Time,Title,Venue,event_text,Start-date,End-date
0,87.5,8pm,When Swallows Cry,"Baxter Theatre Centre, Main Rd, Rondebosch, Ca...",Mike van Graan’s When Swallows Cry is a hard-h...,2018-01-31,2018-02-08
1,0.0,Time: Opening night 6pm to 8pm | Mon – Fri 9am...,Kemang Wa Lehulere @ Stevenson Gallery,"Stevenson Gallery, Buchanan Building, 160 Sir ...","Stevenson presents Here I am, a concrete man, ...",2018-01-18,2018-03-10
2,100.0,Fridays 4pm,Pink Summer Soirées,"Radisson Blu Hotel, 22 Riebeek St, Cape Town",The Ghibli Bar at Radisson Blu Hotel presents ...,NaT,NaT
3,165.0,8pm to 10pm,Mozart Winds: CT Concert Series,"Baxter Concert Hall, 2 Main Rd, Rondebosch, Ca...",Following the UCT Summer School’s Magic of Moz...,2018-01-27,2018-01-27
4,0.0,Time: Tue to Fri 10am to 5pm | Sat 10am to 2pm,StateoftheART: The Marks we Make,"Venue: 50 Buitenkant St, CBD, Cape Town",\nStateoftheART gallery presents a group exhib...,2018-01-25,2018-02-10
