In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

In [None]:
# Read in cannabis data set
cannabis_data_path = os.path.join('Resources', 'cannabis_usage_1961_2018.csv')
cannabis_data = pd.read_csv(cannabis_data_path)

# Clean dataset variable names
cannabis_data.columns = cannabis_data.columns.str.strip().str.lower().str.replace(' ', '_')
cannabis_data.head()

In [None]:
# Drop erroneous variables in cannabis dataset
cannabis_data = cannabis_data.drop(['geo', 'dguid', 'uom', 'uom_id', 'scalar_factor', 'scalar_id', 'vector', 'status', 'symbol', 'terminated', 'decimals'], axis=1)
cannabis_data.head()

In [None]:
# Read in population data
pop_data_path = os.path.join('Resources', 'demographic_data_canada_1971_2020.csv')
pop_data = pd.read_csv(pop_data_path)

# Clean dataset variable names
pop_data.columns = pop_data.columns.str.strip().str.lower().str.replace(' ', '_')
pop_data

In [None]:
# Drop erroneous variables in cannabis dataset
pop_data = pop_data.drop(['geo', 'dguid', 'sex', 'uom_id', 'scalar_factor', 'scalar_id', 'vector', 'status', 'symbol', 'terminated', 'decimals'], axis=1)
pop_data

In [None]:
# Drop erroneous rows for TOTALS in data set
relevant_age_df = pop_data.loc[pop_data.age_group.isin([
 '15 to 19 years',
 '20 to 24 years',
 '25 to 29 years',
 '30 to 34 years',
 '35 to 39 years',
 '40 to 44 years',
 '45 to 49 years',
 '50 to 54 years',
 '55 to 59 years',
 '60 to 64 years',]), :]

relevant_age_df

In [None]:
# Make dictionary to replace bin-values for rebinning
population_groups = {
    '15 to 19 years':'15 to 24 years',
    '20 to 24 years':'15 to 24 years',
    '25 to 29 years':'25 to 44 years',
    '30 to 34 years':'25 to 44 years',
    '35 to 39 years':'25 to 44 years',
    '40 to 44 years':'45 to 64 years',
    '45 to 49 years':'45 to 64 years',
    '50 to 54 years':'45 to 64 years',
    '55 to 59 years':'45 to 64 years',
    '60 to 64 years':'45 to 64 years', 
}

# Replace names in age_groups
rebinned_pop_df = relevant_age_df.replace(population_groups)
rebinned_pop_df.sort_values('ref_date')

# Re-bin values
grouped_binned_pop = rebinned_pop_df.groupby(['ref_date', 'age_group']).value.sum()
grouped_binned_pop = pd.DataFrame(grouped_binned_pop)
grouped_binned_pop = grouped_binned_pop.reset_index()
grouped_binned_pop

In [None]:
# Remove erroneous rows from cannabis DataFrame
annual_age_consumption = cannabis_data.loc[(cannabis_data.frequency_of_use == 'Total, all frequencies') & (cannabis_data.prevalence == 'Consuming population of cannabis') & (cannabis_data.age_group != '15 years and over') & (cannabis_data.age_group != '65 years and over'),:]
annual_age_consumption = annual_age_consumption.reset_index(drop=True)

# Make dictionary for re-binning
age_groups = {
    '15 to 17 years ':'15 to 24 years',
    '18 to 24 years':'15 to 24 years'
}

# Replace names in age_group
rebinned_cannabis_df = annual_age_consumption.replace(age_groups)

# Re-bin values
grouped_binned_cannabis = rebinned_cannabis_df.groupby(['ref_date','age_group']).value.sum()
grouped_binned_cannabis = pd.DataFrame(grouped_binned_cannabis)
grouped_binned_cannabis = grouped_binned_cannabis.reset_index()

grouped_binned_cannabis

In [None]:
# Merge the two data sets
merged_df = grouped_binned_cannabis.merge(grouped_binned_pop, on = ['ref_date', 'age_group'], how = 'inner', suffixes= ['_cannabis', '_pop'])
merged_df

In [None]:
age_15_24_df = merged_df.loc[merged_df.age_group == '15 to 24 years', :]
age_25_44_df = merged_df.loc[merged_df.age_group == '25 to 44 years', :]
age_45_64_df = merged_df.loc[merged_df.age_group == '45 to 64 years', :]

plt.plot(age_15_24_df.ref_date, age_15_24_df.value_cannabis/age_15_24_df.value_pop)
plt.plot(age_25_44_df.ref_date, age_25_44_df.value_cannabis/age_25_44_df.value_pop)
plt.plot(age_45_64_df.ref_date, age_45_64_df.value_cannabis/age_45_64_df.value_pop)

In [None]:
year = list(merged_df.ref_date.unique())

population_by_age_group = {
    '15 to 24 years': list(age_15_24_df.value_cannabis),
    '15 to 44 years': list(age_25_44_df.value_cannabis),
    '45 to 64 years': list(age_45_64_df.value_cannabis),
}

fig, ax = plt.subplots()
ax.stackplot(year, population_by_age_group.values(),
             labels=population_by_age_group.keys())
ax.legend(loc='upper left')
ax.set_title('number of canabis Users over time')
ax.set_xlabel('Year')
ax.set_ylabel('Number of people (millions)')

plt.show()

In [None]:
year = list(merged_df.ref_date.unique())

population_by_age_group = {
    '15 to 24 years': list(age_15_24_df.value_pop),
    '15 to 44 years': list(age_25_44_df.value_pop),
    '45 to 64 years': list(age_45_64_df.value_pop),
}

fig, ax = plt.subplots()
ax.stackplot(year, population_by_age_group.values(),
             labels=population_by_age_group.keys())
ax.legend(loc='upper left')
ax.set_title('number of people over time')
ax.set_xlabel('Year')
ax.set_ylabel('Number of people (ten millions)')

plt.show()

In [None]:
year = list(merged_df.ref_date.unique())

population_by_age_group = {
    '15 to 24 years': list(age_15_24_df.value_cannabis/age_15_24_df.value_pop),
    '15 to 44 years': list(age_25_44_df.value_cannabis/age_25_44_df.value_pop),
    '45 to 64 years': list(age_45_64_df.value_cannabis/age_45_64_df.value_pop),
}

fig, ax = plt.subplots()
ax.stackplot(year, population_by_age_group.values(),
             labels=population_by_age_group.keys())
ax.legend(loc='upper left')
ax.set_title('Percent of population using Cannabis')
ax.set_xlabel('Year')
ax.set_ylabel('Percent of total population')

plt.show()

In [None]:
plt.scatter(merged_df.value_cannabis/merged_df.value_pop, merged_df.value_cannabis)

In [None]:
pct_change = []
pct_change.append(0)

value_storage = []

for index, value in age_15_24_df.iterrows():
    value_storage.append(value.value_cannabis)

for i in range(1, len(value_storage)):
    a = value_storage[i]
    b = value_storage[i - 1]
    pct_change.append(a - b)

plt.plot(age_15_24_df.value_cannabis, pct_change)

In [None]:
pct_change = []
pct_change.append(0)

value_storage = []

for index, value in age_25_44_df.iterrows():
    value_storage.append(value.value_cannabis)

for i in range(1, len(value_storage)):
    a = value_storage[i]
    b = value_storage[i - 1]
    pct_change.append(a - b)

plt.plot(age_15_24_df.value_cannabis, pct_change)

In [None]:
pct_change = []
pct_change.append(0)

value_storage = []

for index, value in age_45_64_df.iterrows():
    value_storage.append(value.value_cannabis)

for i in range(1, len(value_storage)):
    a = value_storage[i]
    b = value_storage[i - 1]
    pct_change.append(a - b)

plt.plot(age_15_24_df.value_cannabis, pct_change)