![](https://thefreshtoast.com/wp-content/uploads/2019/11/saying-cannabis-instead-of-marijuana-doesnt-influence-voters-study-finds.jpg)

In [None]:
import numpy as np 
import pandas as pd
import plotly as py
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)
import seaborn as sns

import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns', None)
#################################################################
drugs = pd.read_csv('../input/drug-use-by-age/drug-use-by-age.csv')

# Basic information

In [None]:
drugs.head(3)

In [None]:
drugs.info()

Actually there are missing values as "-", because of zero usage.

In [None]:
drugs = drugs.replace('-', 0)
drugs[drugs.select_dtypes(exclude = 'number').columns.to_list()[1:]] = drugs[drugs.select_dtypes(exclude = 'number').columns.to_list()[1:]].astype('float')

# EDA

**The most common drugs**

In [None]:
comdrugs = pd.DataFrame(columns = ['Drug','Percentage of addicts'], index = range(0, 12))
comdrugs['Drug'] = drugs.columns.to_list()[4:28:2]
comdrugs['Drug'] = comdrugs['Drug'].apply(lambda x: x.split('-')[0])

per_adds = []
for i in drugs.columns.to_list()[4:28:2]:
    addicts = 0
    for k in range(len(drugs)):
        addicts += drugs['n'][k] * (drugs[i][k] * 0.01)
    per_adds.append(round((addicts / drugs['n'].sum()) * 100, 1)) 
    
comdrugs['Percentage of addicts'] = per_adds
comdrugs = comdrugs.sort_values('Percentage of addicts')
comdrugs['total'] = 100.0
comdrugs

In [None]:
plt.figure(figsize = (8, 14))

sns.set_style("white")
plt.title('The most common drugs', fontsize = 42, fontname = 'monospace', x = 0.9, y = 1.05, color = '#283655')
a = sns.barplot(data = comdrugs, x = 'total', y = 'Drug', color = '#34675C')
b = sns.barplot(data = comdrugs, x = 'Percentage of addicts', y = 'Drug', color = '#B3C100')
plt.xticks([])
plt.yticks(fontname = 'monospace', fontsize = 14, color = '#283655')
plt.ylabel('')
plt.xlabel('')

a.spines['left'].set_linewidth(1.5)
for w in ['right', 'top', 'bottom']:
    a.spines[w].set_visible(False)
    
for p in b.patches:
    width = p.get_width()
    plt.text(6 + width, p.get_y() + 0.55*p.get_height(), f'{width: .1f}%',
             ha = 'center', va = 'center', fontname = 'monospace', fontsize = 15, color = 'white')
    
plt.figtext(0.98, 0.135, 'Number of respondents - 55 268', fontsize = 14, fontname = 'monospace', color = '#34675C')
plt.figtext(1.22, 0.86, '''The least used drugs''', fontsize = 20, fontname = 'monospace', color = '#34675C', ha = 'center')

plt.figtext(0.93, 0.765, '''1. Crack is a crystalline form of cocaine that 
is a mixture of cocaine salts with baking soda 
(sodium bicarbonate) or other chemical base. 
Unlike regular cocaine, crack cocaine is 
ingested through smoking.''', fontsize = 12, fontname = 'monospace', color = '#283655')

plt.figtext(0.93, 0.69, '''2. Sedative a chemically heterogeneous group of 
medicinal substances of plant or synthetic 
origin that cause calming or reducing emotional 
tension without a hypnotic effect''', fontsize = 12, fontname = 'monospace', color = '#283655')

plt.figtext(0.93, 0.645, '''3. Heroin is a semisynthetic opioid drug, in the 
late XIX century — early XX used as a drug.''', fontsize = 12, fontname = 'monospace', color = '#283655')

plt.figtext(0.93, 0.555, '''4. Meth is an amphetamine derivative, a white 
crystalline substance. Methamphetamine is 
a psychostimulant with a high potential for 
addiction, and is therefore classified as 
a narcotic substance.''', fontsize = 12, fontname = 'monospace', color = '#283655')

plt.figtext(1.22, 0.5, '''The most used drugs''', fontsize = 20, fontname = 'monospace', color = '#34675C', ha = 'center')

plt.figtext(0.93, 0.38, '''1. Hallucinogens a class of psychoactive 
substances that cause hallucinations and 
introduce altered states of consciousness. 
A common feature that distinguishes hallucinogens 
from other types of psychoactive substances 
is the ability to change the nature of thinking, 
mood and perception.''', fontsize = 12, fontname = 'monospace', color = '#283655')

plt.figtext(0.93, 0.3, '''2. Pain (analgesics) is a group of medicines 
that are used in medicine to relieve pain of 
any intensity and prepare the patient for 
anesthesia for certain medical procedures.''', fontsize = 12, fontname = 'monospace', color = '#283655')

plt.figtext(0.93, 0.21, '''3. Marijuana is a psychoactive drug derived 
from cannabis, of the psychedelic type. 
The herb affects all aspects of the human 
body: physiology, psyche, behavior, 
nervous activity.''', fontsize = 12, fontname = 'monospace', color = '#283655')

plt.show()

**How does the number of users and the quantity of alcohol and drugs consumed change depending on the age**

In [None]:
fig = plt.figure(figsize = (16, 18))

plt.subplot(211)
sns.set_style("white")
plt.grid(color = 'gray', linestyle = ':', axis = 'x', alpha = 0.8, zorder = 0,  dashes = (1,7))
a = sns.lineplot(data = drugs, x = 'age', y = 'alcohol-use', color = '#4CB5F5', linewidth = 3)
sns.scatterplot(data = drugs, x = 'age', y = 'alcohol-use', color = '#4CB5F5', s = 60)
sns.lineplot(data = drugs, x = 'age', y = 'marijuana-use', color = '#B7B8B6', linewidth = 3)
sns.scatterplot(data = drugs, x = 'age', y = 'marijuana-use', color = '#B7B8B6', s = 60)
sns.lineplot(data = drugs, x = 'age', y = 'cocaine-use', color = '#34675C', linewidth = 3)
sns.scatterplot(data = drugs, x = 'age', y = 'cocaine-use', color = '#34675C', s = 60)
sns.lineplot(data = drugs, x = 'age', y = 'hallucinogen-use', color = '#B3C100', linewidth = 3)
sns.scatterplot(data = drugs, x = 'age', y = 'hallucinogen-use', color = '#B3C100', s = 60)
plt.legend(['Alcohol', 'Marijuana', 'Cocaine', 'Hallucinogen'], bbox_to_anchor = (1.05, -0.1), ncol = 1, borderpad = 4, frameon = False, fontsize = 13)
plt.xticks(fontname = 'monospace')
plt.yticks(fontname = 'monospace')
plt.ylabel('')
plt.xlabel('')

plt.subplot(212)
sns.set_style("white")
plt.grid(color = 'gray', linestyle = ':', axis = 'x', alpha = 0.8, zorder = 0,  dashes = (1,7))
b = sns.lineplot(data = drugs, x = 'age', y = 'alcohol-frequency', color = '#4CB5F5', linewidth = 3)
sns.scatterplot(data = drugs, x = 'age', y = 'alcohol-frequency', color = '#4CB5F5', s = 60)
sns.lineplot(data = drugs, x = 'age', y = 'marijuana-frequency', color = '#B7B8B6', linewidth = 3)
sns.scatterplot(data = drugs, x = 'age', y = 'marijuana-frequency', color = '#B7B8B6', s = 60)
sns.lineplot(data = drugs, x = 'age', y = 'cocaine-frequency', color = '#34675C', linewidth = 3)
sns.scatterplot(data = drugs, x = 'age', y = 'cocaine-frequency', color = '#34675C', s = 60)
sns.lineplot(data = drugs, x = 'age', y = 'hallucinogen-frequency', color = '#B3C100', linewidth = 3)
sns.scatterplot(data = drugs, x = 'age', y = 'hallucinogen-frequency', color = '#B3C100', s = 60)
plt.xticks(fontname = 'monospace')
plt.yticks(fontname = 'monospace')
plt.ylabel('')
plt.xlabel('')

for i in [a,b]:
    for j in ['right', 'top']:
        i.spines[j].set_visible(False)
    for k in ['left', 'bottom']:
        i.spines[k].set_linewidth(1.5)
        
for i in [a,b]:
    i.tick_params(labelsize = 11)
    
plt.figtext(0.05, 0.98, 'Percentage of users', fontsize = 22, fontname = 'monospace', color = '#283655')
plt.figtext(0.05, 0.388, 'Consumed quantity', fontsize = 22, fontname = 'monospace', color = '#283655')

plt.figtext(0.028, 0.54, '''Most alcohol is consumed by young people aged 21-25 years. It is very strange that everyone after the age 
of 21 has the same amount of alcohol consumed.''', fontsize = 12, fontname = 'monospace', color = '#4CB5F5')
plt.figtext(0.028, 0.51, '''Most marijuana is consumed by young people aged 18-21 years. It is interesting that in 30-34 years people 
use a lot of marijuana, apparently a lot of stress.''', fontsize = 12, fontname = 'monospace', color = '#B7B8B6')
plt.figtext(0.028, 0.49, '''Most cocaine is consumed by young people aged 20-25 years. People aged 50-64 do not spare money for cocaine.''', 
            fontsize = 12, fontname = 'monospace', color = '#34675C')
plt.figtext(0.028, 0.45, '''Most hallucinogens is consumed by young people aged 18-20 years. The number of hallucinogens consumed by 
children aged 12 and adults aged 50-64 is very high. Such a strong attraction in children can be understood, 
but why people over 50 go to breaking bad is not clear.''', fontsize = 12, fontname = 'monospace', color = '#B3C100')
    
fig.tight_layout(h_pad = 5)

plt.show()

**Which age group uses drugs more than others**

For the calculation, I will calculate the number of addicts in each group for each type of drug and divide by the number of respondents in the age group.

In [None]:
drugs['drug_coef'] = 0
drugs['drug_coef'] = drugs['drug_coef'].astype('float')
for i in drugs.columns.to_list()[4:28:2]:
    for k in range(len(drugs)):
        drugs['drug_coef'][k] += drugs['n'][k] * (drugs[i][k] * 0.01)
        
for i in range(len(drugs)):
    drugs['drug_coef'][i] = round(drugs['drug_coef'][i] / drugs['n'][i], 2)

In [None]:
fig, ax = plt.subplots(figsize = (17, 9), dpi = 80)
ax.set_title('Which age group uses drugs more than others', fontsize = 35, fontname = 'monospace', x = 0.5, y = 1.07, color = '#283655')
ax.vlines(x = drugs['age'], ymin = 0, ymax = drugs['drug_coef'], color = '#34675C', linewidth = 3)
ax.scatter(x = drugs['age'], y = drugs['drug_coef'], s = 120, color = '#34675C')
plt.xticks(fontname = 'monospace', fontsize = 14, color = '#283655')
plt.yticks(fontname = 'monospace', fontsize = 14, color = '#283655')
ax.set_ylim(0, 0.8)
plt.ylabel('Drugs coefficient', labelpad = 20, fontname = 'monospace', fontsize = 14, color = '#283655')
plt.xlabel('Age group', labelpad = 20, fontname = 'monospace', fontsize = 14, color = '#283655')
plt.grid(color = 'gray', linestyle = ':', axis = 'y', alpha = 0.8, zorder = 0,  dashes = (1,7))

for i in ['left', 'bottom']:
    ax.spines[i].set_linewidth(1.5)
for w in ['right', 'top']:
    ax.spines[w].set_visible(False)

plt.axhline(drugs['drug_coef'].mean(), 0, 1, color = '#B3C100', alpha = 0.5)
plt.figtext(0.13, 0.51, 'mean', fontsize = 13, fontname = 'monospace', color = '#B3C100')

plt.show()

# End

**Important note - this work has only a research purpose and the author does not popularize drugs :)**