In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def erp_per_year(crime, year):
    '''
    returns the estimated resident population for the year
    '''
    erps = []
    erp_column = (crime.loc[(crime["reference_period"] == year)&\
        (crime["Local_Government_Area"] == 'melbourne'), ["LGA_ERP"]])
    erps = erp_column.values 
    return erps[0][0]

def crime_by_division(crime, year, division, erp):
    '''
    returns the total crime in Melbourne for the specified offence
    division and year per 100,000 people also prints out the total 
    crime count and rate
    '''
    #boolean indexing
    sum_crime =(crime.loc[(crime["reference_period"] == year) &\
           (crime["Local_Government_Area"] == 'melbourne')&\
           (crime["CSA_Offence_Division"]==division), ["Offence_Count"]])
    #to find total offence count 
    summ = sum_crime.sum()
    print("Total crime for ", year, ' Division:', division, 'is: ', summ)
    #scaled offence rate 
    per_100000 = round((summ[-1])/erp*100000)
    print("per 100,000: ", per_100000)
    return (per_100000)
    
def grouped_bar_chart(crime):
    '''
    visualises a stacked bar chart of the total crime for the years 
    2014 and 2015, for the different offence categories. 
    '''
    data1, data2 = [], []
    crime_types=['A Crimes against the person', \
                'B Property and deception offences','C Drug offences',\
            'D Public order and security offences',\
            'E Justice procedures offences', 'F Other offences']
    num_group = len(crime_types) 
    erp1 = erp_per_year(crime, 2014)
    erp2 = erp_per_year(crime, 2015)
    #creates list of data for each of the years 2014 and 2015
    for crime_type in crime_types:
        data1.append(crime_by_division(crime, 2014 ,crime_type, erp1))
    for crime_type in crime_types:
        data2.append(crime_by_division(crime, 2015 ,crime_type, erp2)) 
    #plots bargraph 
    fig, ax = plt.subplots()
    index = np.arange(num_group)
    width = 0.3
    plt1 = plt.bar(index, data1, width, color=(.506, 0, .047),\
                   edgecolor =(.506, 0, .047), label='2014')
    plt2 = plt.bar(index+width+0.04 , data2, width, color=(1, .514, .553),\
                   edgecolor =(1, .514, .553), label='2015')
    #editing to make bar chart clearer
    plt.xticks(index + width+0.04, (['A', 'B', 'C', 'D', 'E', 'F']), \
               fontsize = 15)
    plt.legend()
    plt.grid(True)
    #indicates the offence rate on top of each bar 
    for i, v in enumerate(data1):    
        v2 = data2[i]
        plt.text(i, (v+50), str(v), color= (.31, 0, .027), fontsize = 11,\
                 fontweight='bold')
        plt.text(i+width+0.04, (v2+50), str(v2), color= (.31, 0, .027), \
                 fontsize = 11, fontweight='bold')    
    plt.show()

def main(): 
    crime = pd.read_csv("crimebylocationdatatable.csv")  
    grouped_bar_chart(crime)

if __name__ == "__main__":
    main()


Total crime for  2014  Division: A Crimes against the person is:  Offence_Count    3723
dtype: int64
per 100,000:  3046.0
Total crime for  2014  Division: B Property and deception offences is:  Offence_Count    18041
dtype: int64
per 100,000:  14763.0
Total crime for  2014  Division: C Drug offences is:  Offence_Count    2050
dtype: int64
per 100,000:  1677.0
Total crime for  2014  Division: D Public order and security offences is:  Offence_Count    5077
dtype: int64
per 100,000:  4154.0
Total crime for  2014  Division: E Justice procedures offences is:  Offence_Count    3687
dtype: int64
per 100,000:  3017.0
Total crime for  2014  Division: F Other offences is:  Offence_Count    169
dtype: int64
per 100,000:  138.0
Total crime for  2015  Division: A Crimes against the person is:  Offence_Count    3743
dtype: int64
per 100,000:  2885.0
Total crime for  2015  Division: B Property and deception offences is:  Offence_Count    19543
dtype: int64
per 100,000:  15064.0
Total crime for  2015 