In [5]:
import pandas as pd
import geopandas as gpd
import numpy as np
pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 300)

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
%matplotlib inline

import os
import sys
import datetime

import warnings; warnings.filterwarnings('ignore')

sys.path.append('..')
from config import CFG
CFG = CFG()

date = datetime.datetime(2023, 3, 1).strftime('%Y-%m-%d')
CFG.DATE = date

In [12]:
df = pd.read_csv(os.path.join(CFG.PROCESSED_DATA_PATH, f"df-processed-{CFG.DATE}.csv"), parse_dates=['visit date'])
df.head(3)

Unnamed: 0.1,Unnamed: 0,visit date,city,site,expected drug,category,colour,texture,fentanyl strip,benzo strip,ftir component 0,ftir component 1,ftir component 2,ftir component 3,ftir component 4,ftir component 5,contains_opioids,total_opioids,ftir_benzo,total_benzos
0,0,2023-01-31,"Penticton, BC",Fairhaven,Down (Unknown Opioid),Opioid,Purple,Chunk,1,-1,Fentanyl,Erythritol,Caffeine,,,,1,1,0,0
1,1,2023-01-31,"Vancouver, BC",Get Your Drugs Tested,Unknown,Unknown,Brown (light),Chunk,1,-1,Uncertain match,Fentanyl,,,,,1,1,0,0
2,2,2023-01-31,"Vancouver, BC",Get Your Drugs Tested,Alprazolam,Depressant,Green (light),Chunk,-1,1,Flualprazolam,Microcrystalline cellulose,,,,,0,0,1,1


In [27]:
start_date = df['visit date'].min().date
end_date = df['visit date'].max().date()
start_date

<function Timestamp.date>

In [26]:
newdf = df[(df['visit date'].dt.date >= start_date) & (df['visit date'].dt.date <= end_date)]
no_opioid = newdf[~(df['category'] == 'Opioid') | (df['category'] == 'Polysubstance')].index
no_opioid = newdf.iloc[no_opioid]

In [5]:
site_count = df.value_counts('site').head(10).sort_values(ascending=False)
city_counts = df.value_counts('city').head(10)

fig = go.Figure(go.Bar(
    x=site_count.values,
    y=site_count.index,
    orientation='h',
))
fig.show()

- The highest number of samples in each category is much more popular than all others

In [8]:
fig = go.Figure(go.Pie(
    labels=df.category.value_counts().index,
    values=df.category.value_counts().values,
    hole=0.65
))
fig.update_traces(hoverinfo='value+percent', textinfo='label', textfont_size=20,
                  marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(
    annotations=[
        dict(text="Categories", x=0.5, y=0.5, font_size=20, showarrow=False)
    ]
)
fig.show()

In [8]:
fig = go.Figure(go.Pie(
    labels=['samples'],
    values=[df.shape[0]],
    hole=0.65
))
fig.update_traces(hoverinfo='value+percent', textinfo='label', textfont_size=20,
                  marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(
    annotations=[
        dict(text="Total<br>Samples", x=0.5, y=0.5, font_size=20, showarrow=False)
    ]
)
fig.show()

In [14]:
fig = go.Figure(go.Pie(
    labels=df['total_opioids'].replace({1: "Positive", 0: "Negative"}).value_counts().index,
    values=df['total_opioids'].value_counts().values,
    hole=0.65
))
fig.update_traces(hoverinfo='value+percent', textinfo='label', textfont_size=20,
                  marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(
    #     yaxis=dict(
    #     range=[0.2, .8]
    # ),
    # xaxis=dict(
    #     range=[0.2, .8]
    # ),
    autosize=False,
    width=500,
    height=500,
    showlegend=False,
    annotations=[
        dict(text="Opioids<br>Present", x=0.5, y=0.5, font_size=20, showarrow=False)
    ])

fig.show()

In [12]:
fig = go.Figure(go.Pie(
    labels=df['total_benzos'].replace({1: "Positive", 0: "Negative"}).value_counts().index,
    values=df['total_benzos'].value_counts().values,
    hole=0.65
))
fig.update_traces(hoverinfo='value+percent', textinfo='label', textfont_size=20,
                  marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(
    annotations=[
        dict(text="Benzos<br>Present", x=0.5, y=0.5, font_size=20, showarrow=False)
    ])

fig.show()

In [4]:
ftirs = [x for x in df.columns.tolist() if x.startswith('ftir')]
for col in ftirs:
    df[col] = df[col].astype(str)

In [22]:
no_opioid = df[~(df['category'] == 'Opioid') | (df['category'] == 'Polysubstance')].index
no_opioid = df.iloc[no_opioid]

fig = go.Figure(go.Pie(
    labels=no_opioid['total_opioids'].replace({1: "Positive", 0: "Negative"}).value_counts().index,
    values=no_opioid['total_opioids'].value_counts().values,
    hole=0.65
))
fig.update_traces(hoverinfo='value+percent', textinfo='label', textfont_size=20,
                  marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(
    annotations=[
        dict(text="Unexpected<br>Opioids", x=0.5, y=0.5, font_size=20, showarrow=False)
    ])
fig.show()

In [5]:
no_benzo = df[~(df['category'] == 'Depressant')].index
no_benzo = df.iloc[no_benzo]

fig = go.Figure(go.Pie(
    labels=no_benzo['total_benzos'].replace({1: "Positive", 0: "Negative"}).value_counts().index,
    values=no_benzo['total_benzos'].value_counts().values,
    hole=0.65
))
fig.update_traces(hoverinfo='value+percent', textinfo='label', textfont_size=20,
                  marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(
    annotations=[
        dict(text="Unexpected<br>Benzos", x=0.5, y=0.5, font_size=20, showarrow=False)
    ])
fig.show()

In [25]:
total_opioids = df[df['total_opioids'] == 1]

fig = go.Figure(go.Pie(
    labels=total_opioids['total_benzos'].replace({1: "Positive", 0: "Negative"}).value_counts().index,
    values=total_opioids['total_benzos'].value_counts().values,
    hole=0.65
))
fig.update_traces(hoverinfo='value+percent', textinfo='label', textfont_size=20,
                  marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(
    annotations=[
        dict(text="Benzos in<br>Opioids", x=0.5, y=0.5, font_size=20, showarrow=False)
    ])
fig.show()

In [340]:
df['colour2'] = df['colour']
# colour_df = df.groupby(['colour', 'texture']).agg(lambda x: x.value_counts().index[0])

In [402]:
colours = df.groupby(['colour', 'texture'])['colour2'].head(1)

In [375]:
df['colour2'] = df['colour2'].replace({
    "Purple": '#A020F0',
    "light Brown": "#C4A484",
    "light Green": "#90EE90",
    "Pink": "#FFC0CB",
    "Colourless": "#F8F8FF",
    "White": "#FFFFFF",
    "Black": "#000000",
    "dark Purple": "#301934",
    "Brown": "#964B00",
    "dark Blue": "#00008B",
    "Blue": "#0000FF",
    "light Yellow": "#FFFFE0",
    "light Pink": "#FFB6C1",
    "dark Brown": "#654321",
    "light Grey": "#D3D3D3",
    "dark Pink": "#FF1493",
    "light Blue": "#ADD8E6",
    "light Purple": "#E6E6FA",
    "light Orange": "#FFA07A",
    "light Red": "#FFA07A",
    "dark Orange": "#FF8C00",
    "dark Green": "#006400",
    "dark Grey": "#A9A9A9",
    "dark Red": "#8B0000",
    "dark Yellow": "#FFD700",
    "Other": "#F8F8FF"    
})

In [409]:
df['all drugs'] = 'all drugs'
fig = px.sunburst(df, path=['all drugs', 'colour', 'texture'], color_discrete_sequence=['white']+colours.values.tolist(), color='colour')
fig.show()

In [411]:
fig.data[0]

Sunburst({
    'branchvalues': 'total',
    'customdata': array([['Brown (light)'],
                         ['Blue'],
                         ['White'],
                         ...,
                         ['Grey (light)'],
                         ['Blue'],
                         ['White']], dtype=object),
    'domain': {'x': [0.0, 1.0], 'y': [0.0, 1.0]},
    'hovertemplate': ('labels=%{label}<br>count=%{val' ... '{customdata[0]}<extra></extra>'),
    'ids': array(['all drugs/Brown (light)/Crystal', 'all drugs/Blue/Other',
                  'all drugs/White/Tablet (pharmaceutical)', ...,
                  'all drugs/Grey (light)/Granules', 'all drugs/Blue/Pressed tablet',
                  'all drugs/White/Capsule'], dtype=object),
    'labels': array(['Crystal', 'Other', 'Tablet (pharmaceutical)', ..., 'Granules',
                     'Pressed tablet', 'Capsule'], dtype=object),
    'marker': {'colors': [white, #A020F0, #C4A484, ..., Green, #A020F0, #C4A484]},
    'name': '',
 

In [377]:
replacement_list = {
    'Tucibi': '2c-b',
    'MD-X (Unknown)': '(MDMA|MDA)',
    'Cannabis': '(thc|CBD|cannabidiol)',
    'Changa': 'DMT',
    '2C-Family': '(2c-b|2c-i|2c-t-2|2C-Family)',
    'Speed': 'methamphetamine',
    'Down (Unknown Opioid)': '(.*an(y|i)l|heroin|(code|morph|buprenorph)ine|(oxy|hydro)(cod|morph)one|.*tazene|w-1(8|9)|opium|(furanyl\s)?uf-17|6-mam)',
    'Down': '(.*an(y|i)l|heroin|(code|morph|buprenorph)ine|(oxy|hydro)(cod|morph)one|.*tazene|w-1(8|9)|opium|(furanyl\s)?uf-17|6-mam)'
}

In [379]:
# df['replaced'] = df['expected drug'].replace(replacement_list, regex=False)
df['split_drug'] = df['expected drug'].str.split(' and ')
exp_drugs = pd.DataFrame(df['split_drug'].tolist(), index=df.index)
exp_drugs = exp_drugs.replace(replacement_list, regex=False)

In [252]:
df['expected_matched'] = [int(d in l) for d, l in zip(df['expected drug'], df[ftirs].values.tolist())]

In [255]:
fig = go.Figure(go.Pie(
    labels=df['expected_matched'].replace({1: "Yes", 0: "No"}).value_counts().index,
    values=df['expected_matched'].value_counts().values,
    hole=0.65
))
fig.update_traces(
    hoverinfo='value+percent', textinfo='label', textfont_size=20,
    marker=dict(line=dict(color='#000000', width=2))
)
fig.update_layout(
    annotations=[
        dict(text="Expected Drug<br>Matched Result", x=0.5, y=0.5, font_size=20, showarrow=False)
    ]
)

In [406]:
def match_drugs(expected, tested):
    return 1 if re.search(expected, tested, re.IGNORECASE) else 0

In [None]:
def match_drugs(expected, tested):
    score = sum(1 if re.search(drug, test, re.IGNORECASE) else 0 for test in tested)
    return 1 if score == num_exp else 0    

In [352]:
df['expected drug'].unique()

array(['Down (Unknown Opioid)', 'Unknown', 'Alprazolam',
       'Methamphetamine', 'Fentanyl', 'MDMA', 'Ketamine', 'Pyrazolam',
       'Opium', 'MD-X (Unknown)', 'MDA', '4-HO-MET', 'Cocaine', 'Codeine',
       'Oxycodone', 'Amphetamine', 'Xylazine', 'Bromazolam', 'Gaboxadol',
       '4-AcO-DMT', 'GHB', 'Crack Cocaine', 'Diazepam', 'Tucibi',
       'Hydromorphone', 'LSD', 'Clonazepam', 'Testosterone Cypionate',
       'Clomifene', 'Tamoxifen', 'Copper Chromite', 'Methandrostenolone',
       'GBL', 'Acetaminophen and Oxycodone', 'Tadalafil', 'Heroin',
       '2C-B', 'CBD', '5-MeO-MiPT', 'Mescaline', 'GW501516',
       'Metonitazene', 'DPT', 'Modafinil', 'Sildenafil',
       'Testosterone Isocaproate', 'DMT', 'Desalkylgidazepam',
       'Benzocaine', 'Ivermectin', 'Quetiapine', 'Flualprazolam',
       'Sildenafil and Tadalafil', '2C-E', 'Fentanyl and Heroin',
       'Tramadol', '1,4-Butanediol', 'Carisoprodol', 'Lorazepam',
       'Deschloroetizolam', 'FXE', '4-AcO-MET', 'Oxandrolone',
  

In [347]:
def test(x):
    return 1 if re.search(
    '(.*an(y|i)l|heroin|(code|morph|buprenorph)ine|(oxy|hydro)(cod|morph)one|.*tazene|w-1(8|9)|opium|(furanyl\s)?uf-17|6-mam)',
    x,
    re.IGNORECASE
) else 0

test2 = df['ftir component 0'].apply(lambda x: test(x))
t2 = test2[test2 == 1].index

In [330]:
test1 = df['ftir component 0'].apply(lambda x: opioid_present(x))
t1 = test1[test1 == 1].index

In [441]:
incorrect_strip_opioid = (df['contains_opioids'] == 1) & ((df['fentanyl strip'] == -1) | (df['fentanyl strip'] == 0))
incorrect_ftir_opioid = (df['fentanyl strip'] == 1) & (df['contains_opioids'] == 0)

incorrect_strip_benzo = (df['ftir_benzo'] == 1) & ((df['benzo strip'] == -1) | (df['benzo strip'] == 0))
incorrect_ftir_benzo = (df['benzo strip'] == 1) & (df['ftir_benzo'] == 0)

# strip_diff = df[df['fentanyl strip'] == 1].index.difference(df[df['contains_opioids'] == 1].index)
# strip_diff = df.iloc[strip_diff]

In [443]:
(incorrect_strip_opioid + incorrect_strip_benzo).value_counts()

False    56394
True      3606
dtype: int64

In [447]:
incorrect_ftir_opioid.value_counts()

False    54562
True      5438
dtype: int64

In [440]:
idx = df[incorrect_strip].index
idx2 = df[incorrect_ftir].index
df['incorrect_strip'] = pd.Series(df.index.isin(idx)).astype(int)
df['incorrect_ftir'] = pd.Series(df.index.isin(idx2)).astype(int)
df

Unnamed: 0,visit date,city,site,expected drug,category,colour,texture,fentanyl strip,benzo strip,ftir component 0,ftir component 1,ftir component 2,ftir component 3,ftir component 4,ftir component 5,contains_opioids,total_opioids,ftir_benzo,total_benzos,incorrect_strip,incorrect_ftir
0,2023-01-31,Penticton,Fairhaven,Down (Unknown Opioid),Opioid,Purple,Chunk,1,-1,Fentanyl,Erythritol,Caffeine,,,,1,1,0,0,0,0
1,2023-01-31,Vancouver,Get Your Drugs Tested,Unknown,Unknown,Brown (light),Chunk,1,-1,Uncertain match,Fentanyl,,,,,1,1,0,0,0,0
2,2023-01-31,Vancouver,Get Your Drugs Tested,Alprazolam,Depressant,Green (light),Chunk,-1,1,Flualprazolam,Microcrystalline cellulose,,,,,0,0,1,1,0,0
3,2023-01-31,Nanaimo,CMHA,Down (Unknown Opioid),Opioid,Pink,Powder,1,1,Fentanyl,Mannitol,Caffeine,Bromazolam,,,1,1,1,1,0,0
4,2023-01-31,Cranbrook,ANKORS (Cranbrook),Methamphetamine,Stimulant,Colourless,Crystal,-1,0,Methamphetamine,,,,,,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,2018-12-03,Vancouver,Insite,MDMA,Psychedelic,Black,Granules,0,0,Uncertain match,Phenacetin,,,,,0,0,0,0,0,0
59996,2018-12-03,Vancouver,Insite,Fentanyl,Opioid,Purple,Granules,1,0,Caffeine,Inositol,Heroin hcl,Fentanyl,,,1,1,0,0,0,0
59997,2018-12-03,Vancouver,Getaway,Unknown,Unknown,Colourless,Crystal,-1,0,Methamphetamine,,,,,,0,0,0,0,0,0
59998,2018-12-03,Vancouver,Getaway,Fentanyl,Opioid,Green,Pebble,1,0,Fentanyl,Caffeine,Mannitol,Mannitol,Caffeine,Fentanyl,1,1,0,0,0,0
