In [1]:
%matplotlib

from tabula import read_pdf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.gridspec as gridspec
from scipy import stats

from os import listdir

Using matplotlib backend: MacOSX


In [2]:
# Read and Clean Crop Data for 2000 to 2015 from the UNODC PDF file
crops_new = read_pdf('Cultivos.pdf', pandas_options={'thousands':'.'})

crops_new.rename(columns={'2009*':'2009'}, inplace=True)

year_rename_dict = {}
for year in crops_new.columns[1:]:
    year_rename_dict[year] = int(year)

crops_new.rename(columns=year_rename_dict, inplace=True)
crops_new.set_index('Departamento', inplace=True)

In [3]:
# Read and Clean Crop Data for 1994 to 2000 from the UNODC PDF file
crops_old = read_pdf('Cultivos_historicos.pdf')

crops_old = crops_old.rename(columns={crops_old.columns[0]:'Country'})

crops_old[1994] = [48100, 44700, 108600, 201400]
crops_old[1995] = [48600, 50900, 115300, 214800]
crops_old[1996] = [48100, 67200, 94400, 209700]
crops_old[1997] = [45800, 79400, 68800, 194000]
crops_old[1998] = [38000, 101800, 51000, 190800]
crops_old[1999] = [21800, 160100, 38700, 220600]
crops_old[2000] = [14600, 163300, 43400, 221300]

crops_old.drop(crops_old.columns[1:9], inplace=True, axis=1)
crops_old.set_index(['Country'], inplace=True)

In [4]:
# Define function that cleans Seizure data obtained form https://data.unodc.org/#state:11
# Unfortunately it is only possible ot obtain the data in PDF form at the moment
def clean_seizure(path, country):
    df = read_pdf(path, pandas_options={'header':None})
    cols_to_rename = {0:'Year', 1:'Drug Group', 2: 'Drug', 3:'Quantity', 4:'Unit'}
    if len(df.columns) == 8:
        cols = [5, 6, 7]
    if len(df.columns) == 7:
        cols = [5, 6]
    df = df.drop(columns=cols).rename(columns=cols_to_rename)
    df['Drug Group'] = 'Cocaine-type'
    df.iloc[:,0].fillna(method='ffill', inplace=True)
    df = df[df.iloc[:, 3] != 'Quantity']
    df.dropna(inplace=True)

    def fun(row):
        row.iloc[3] = float(row.iloc[3].replace(',', ''))
        if row.iloc[2] == 'Cocaine (base, paste':
            row.iloc[2] = 'Coca paste/cocaine base'
        if row.iloc[4] == 'Ton':
            row.iloc[3] = float(row.iloc[3])*907
            row.iloc[4] = 'Kilogram'
        return row

    df = df.apply(fun, axis=1)
    df = df.reset_index().drop('index', axis=1)
    df['Country'] = country
    df = df[df.iloc[:, 4] == 'Kilogram']
    
    return df

In [5]:
# Read and Clean Colombian Seizure Data for the periods between 1994 until 2015
folder = 'Seizures_Col_94_15/'
files = ['Seizures_Col_94-99.pdf', 'Seizures_Col_00-04.pdf', 'Seizures_Col_05-10.pdf',
         'Seizures_Col_11-15.pdf']

seizures_col_94_99 = clean_seizure(folder + files[0], 'Colombia')
seizures_col_00_04 = clean_seizure(folder + files[1], 'Colombia')
seizures_col_05_10 = clean_seizure(folder + files[2], 'Colombia')
seizures_col_11_15 = clean_seizure(folder + files[3], 'Colombia')

In [6]:
# Read and Clean USA Seizure Data for the periods between 1994 until 2015
folder = 'Seizures_USA_94_15/'
files = ['Seizures_USA_94-99.pdf', 'Seizures_USA_00-04.pdf', 'Seizures_USA_05-10.pdf',
         'Seizures_USA_11-15.pdf']

seizures_USA_94_99 = clean_seizure(folder + files[0], 'USA')
seizures_USA_00_04 = clean_seizure(folder + files[1], 'USA')
seizures_USA_05_10 = clean_seizure(folder + files[2], 'USA')
seizures_USA_11_15 = clean_seizure(folder + files[3], 'USA')

In [7]:
# Read and process US Use Data (COCAINE - PAST YEAR USE) for 94 to 00 from the 
# National Household Survey on Drug Abuse for each corresponding year
# Original files can be obtained form https://www.icpsr.umich.edu/icpsrweb/ICPSR/index.jsp
# Here (to comply with GitHub's file size policy) only the relevant columns have been used 
folder = 'US Use Data 94-99/'
files = listdir(folder)
if '.DS_Store' in files: files.remove('.DS_Store')
files.sort()
Used_LYr_94_99 = []

for file in files:
    path = folder + file
    df = pd.read_csv(path, usecols=['COCYR'])
    if np.all(df.COCYR.unique() == [0,1]):
        used = df.COCYR.sum()
        total = df.COCYR.count()
        Used_LYr_94_99.append(round(used/total * 100, 2))
    else:
        used = sum(df.COCYR.map(lambda x: 'Used within' in x))
        total = df.COCYR.count()
        Used_LYr_94_99.append(round(used/total * 100, 2))

# Read and process US Use Data (COCAINE - PAST YEAR USE) for 00 to 14 from the 
# National Household Survey on Drug Abuse for each corresponding year
# Original files can be obtained form https://www.icpsr.umich.edu/icpsrweb/ICPSR/index.jsp
# Here (to comply with GitHub's file size policy) only the relevant columns have been used 
folder = 'US Use Data 00-14/'
files = listdir(folder)
if '.DS_Store' in files: files.remove('.DS_Store')
files.sort()
Used_LYr_00_14 = []

for file in files:
    path = folder + file
    df = pd.read_csv(path, usecols=['COCYR'])
    if np.all(df.COCYR.unique() == [0,1]):
        used = df.COCYR.sum()
        total = df.COCYR.count()
        Used_LYr_00_14.append(round(used/total * 100, 2))
    else:
        used = sum(df.COCYR.map(lambda x: 'Used within' in x))
        total = df.COCYR.count()
        Used_LYr_00_14.append(round(used/total * 100, 2))

In [8]:
# Read and process US Use Data (LESS ACTIVITIES B/C OF COC USE) for 00 to 14 from the 
# National Household Survey on Drug Abuse for each corresponding year
# Original files can be obtained form https://www.icpsr.umich.edu/icpsrweb/ICPSR/index.jsp
# Here (to comply with GitHub's file size policy) only the relevant columns have been used 
folder = 'US Use Data 00-14 (less active)/'
files = listdir(folder)
if '.DS_Store' in files: files.remove('.DS_Store')
files.sort()
LSACT_LYr_00_14 = []

for file in files:
    path = folder + file
    df = pd.read_csv(path, usecols=['COCLSACT'])
    used = sum(df.COCLSACT == 'Yes')
    LSACT_LYr_00_14.append(used)

In [10]:
# Create Visualization for all collected Data
# Create Figure
fig = plt.figure()

gs = gridspec.GridSpec(5, 1) #Define Gridpace

off_spines = ['left', 'top', 'right', 'bottom'] # Spines to turn off

# Make Bar Chart of Usage in USA
ax = fig.add_subplot(gs[:2, 0])
ax.bar(np.arange(1994, 2000), Used_LYr_94_99, color='salmon')
ax.bar(np.arange(2000, 2015), Used_LYr_00_14, color='salmon')
ax.vlines(1999.5, 0, 3.5, color='black', alpha=0.9)
ax.hlines(2, 1993, 2014.5, color = 'black', linestyle='dotted')
ax.text(1992, 2.1, '2% of Pop.')
ax.annotate('Start of Plan Colombia', xy=(1999.5, 3), xytext=(1994, 3.5),
            arrowprops=dict(facecolor='black', shrink=0.01),
            ) # Creates an arrow with a text behind it

ax.set_ylabel('Used Cocaine\nwithin Last Year\n(% of Population)', fontsize=9)
ax.get_yaxis().set_ticks([]) # remove all y labels

ax.xaxis.set_visible(False)

for spine in off_spines:
    ax.spines.get(spine).set_visible(False)

# Make Plot of people being less active because of cocaine
ax_ = ax.twinx() # Setting double Y-Axis
label_txt = '# of People Reporting Being Less\nActive Due to Cocaine Use'
ax_.plot(np.arange(2000, 2015), LSACT_LYr_00_14, label=label_txt, color='black')
ax_.legend(loc=1, frameon=True, facecolor='white')

ax_.get_yaxis().set_ticks([])

for spine in off_spines:
    ax_.spines.get(spine).set_visible(False)

for tl in ax_.get_yticklabels():
    tl.set_color('black')

for x, y in zip(np.arange(2000, 2015), LSACT_LYr_00_14):
    ax_.text(x, y, str(y), fontsize=9, color='black')

# Make Bar Chart of Crops in Colombia
ax1 = fig.add_subplot(gs[2, 0], sharex = ax)

ax1.bar(crops_old.loc['Colombia'].index, crops_old.loc['Colombia'], color='salmon')
ax1.bar(crops_new.loc['Total Nacional'].index, crops_new.loc['Total Nacional'],
        color='salmon')
ax1.vlines(1999.5, 0, 250000, color='black', alpha=0.9)
ax1.hlines(100000, 1993, 2014.5, color='black', linestyle='dotted')
ax1.text(1992, 100000*1.1, '100,000 Hectares')

ax1.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
ax1.get_yaxis().set_ticks([])
ax1.xaxis.set_visible(False)

for spine in off_spines:
    ax1.spines.get(spine).set_visible(False)
ax1.set_ylabel('Hectares of\nCoca Plantations\nin Colombia', fontsize=9)
    
# Make Bar Chart of Seizures in Colombia
ax2 = fig.add_subplot(gs[3, 0], sharex=ax)

col_list = [seizures_col_94_99, seizures_col_00_04, seizures_col_05_10, seizures_col_11_15]

for i, df in enumerate(col_list):
    bars = ax2.bar(df.groupby('Year').sum().index, df.groupby('Year').sum().Quantity)
    for bar in bars:
            bar.set_facecolor('salmon')

ax2.vlines(1999.5, 0, 1300000, color='black', alpha=0.9)
ax2.hlines(1000000, 1993, 2014.5, color='black', linestyle='dotted')
ax2.text(1992, 1000000*1.1, '1 M Kg')

ax2.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
ax2.get_yaxis().set_ticks([])
ax2.xaxis.set_visible(False)
ax2.set_ylabel('Kilos of\nCocaine Seized\nin Colombia', fontsize=9)

for spine in off_spines:
    ax2.spines.get(spine).set_visible(False)

# Make Bar Chart of Seizures in the USA
ax3 = fig.add_subplot(gs[4, 0], sharex=ax)

US_list = [seizures_USA_94_99, seizures_USA_00_04, seizures_col_05_10, seizures_col_11_15]

for i, df in enumerate(US_list):
    bars = ax3.bar(df.groupby('Year').sum().index, df.groupby('Year').sum().Quantity)
    for bar in bars:
            bar.set_facecolor('salmon')

ax3.hlines(1000000, 1993, 2014.5, color='black', linestyle='dotted')
ax3.text(1992, 1000000*1.1, '1 M Kg')
ax3.vlines(1999.5, 0, 1600000, color='black', alpha=0.9)

ax3.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
ax3.get_yaxis().set_ticks([])
ax3.set_ylabel('Kilos of\nCocaine Seized\nin USA', fontsize=9)
ax3.get_xaxis().set_ticks(range(1994, 2016, 2))

for spine in off_spines:
    ax3.spines.get(spine).set_visible(False)