In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 300)

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
%matplotlib inline

import os
import sys
import datetime

import warnings; warnings.filterwarnings('ignore')

sys.path.append('..')
from config import CFG
CFG = CFG()

date = datetime.datetime(2023, 2, 10).strftime('%Y-%m-%d')
CFG.DATE = date

In [4]:
df = pd.read_csv(os.path.join(CFG.PROCESSED_DATA_PATH, f"df-processed-{CFG.DATE}.csv"))
df.head(3)

Unnamed: 0,visit date,city,site,expected drug,category,colour,texture,fentanyl strip,benzo strip,ftir component 0,ftir component 1,ftir component 2,ftir component 3,ftir component 4,ftir component 5
0,2023-01-31,Penticton,Fairhaven,Down (Unknown Opioid),Opioid,Purple,Chunk,1,-1,Fentanyl,Erythritol,Caffeine,,,
1,2023-01-31,Vancouver,Get Your Drugs Tested,Unknown,Unknown,Brown (light),Chunk,1,-1,Uncertain match,Fentanyl,,,,
2,2023-01-31,Vancouver,Get Your Drugs Tested,Alprazolam,Depressant,Green (light),Chunk,-1,1,Flualprazolam,Microcrystalline cellulose,,,,


In [6]:
print(df['category'].unique())

['Opioid' 'Unknown' 'Depressant' 'Stimulant' 'Psychedelic' 'Other'
 'Polysubstance']


In [7]:
df.nunique()

visit date          1363
city                  35
site                 110
expected drug        382
category               7
colour                32
texture               15
fentanyl strip         3
benzo strip            3
ftir component 0     386
ftir component 1     312
ftir component 2     195
ftir component 3     123
ftir component 4      62
ftir component 5      35
dtype: int64

In [5]:
site_count = df.value_counts('site').head(10).sort_values(ascending=False)
city_counts = df.value_counts('city').head(10)

fig = go.Figure(go.Bar(
    x=site_count.values,
    y=site_count.index,
    orientation='h',
))
fig.show()

- The highest number of samples in each category is much more popular than all others

In [8]:
fig = go.Figure(go.Pie(
    labels=df.category.value_counts().index,
    values=df.category.value_counts().values,
    hole=0.65
))
fig.update_traces(hoverinfo='value+percent', textinfo='label', textfont_size=20,
                  marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(
    annotations=[
        dict(text="Categories", x=0.5, y=0.5, font_size=20, showarrow=False)
    ]
)
fig.show()

In [9]:
fig = go.Figure(go.Pie(
    labels=['samples'],
    values=[df.shape[0]],
    hole=0.65
))
fig.update_traces(hoverinfo='value+percent', textinfo='label', textfont_size=20,
                  marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(
    annotations=[
        dict(text="Total<br>Samples", x=0.5, y=0.5, font_size=20, showarrow=False)
    ]
)
fig.show()

In [11]:
fig = go.Figure(go.Pie(
    labels=df['total_opioids'].replace({1: "Positive", 0: "Negative"}).value_counts().index,
    values=df['total_opioids'].value_counts().values,
    hole=0.65
))
fig.update_traces(hoverinfo='value+percent', textinfo='label', textfont_size=20,
                  marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(
    annotations=[
        dict(text="Opioids<br>Present", x=0.5, y=0.5, font_size=20, showarrow=False)
    ]
)
fig.show()

In [12]:
fig = go.Figure(go.Pie(
    labels=df['total_benzos'].replace({1: "Positive", 0: "Negative"}).value_counts().index,
    values=df['total_benzos'].value_counts().values,
    hole=0.65
))
fig.update_traces(hoverinfo='value+percent', textinfo='label', textfont_size=20,
                  marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(
    annotations=[
        dict(text="Benzos<br>Present", x=0.5, y=0.5, font_size=20, showarrow=False)
    ]
)
fig.show()

In [6]:
ftirs = [x for x in df.columns.tolist() if x.startswith('ftir')]
for col in ftirs:
    df[col] = df[col].astype(str)

In [21]:
no_opioid = df[~(df['category'] == 'Opioid') | (df['category'] == 'Polysubstance')].index
no_opioid = df.iloc[no_opioid]

In [22]:
fig = go.Figure(go.Pie(
    labels=no_opioid['total_opioids'].replace({1: "Positive", 0: "Negative"}).value_counts().index,
    values=no_opioid['total_opioids'].value_counts().values,
    hole=0.65
))
fig.update_traces(hoverinfo='value+percent', textinfo='label', textfont_size=20,
                  marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(
    annotations=[
        dict(text="Unexpected<br>Opioids", x=0.5, y=0.5, font_size=20, showarrow=False)
    ]
)
fig.show()

In [24]:
total_opioids = df[df['total_opioids'] == 1]

In [25]:
fig = go.Figure(go.Pie(
    labels=total_opioids['total_benzos'].replace({1: "Positive", 0: "Negative"}).value_counts().index,
    values=total_opioids['total_benzos'].value_counts().values,
    hole=0.65
))
fig.update_traces(hoverinfo='value+percent', textinfo='label', textfont_size=20,
                  marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(
    annotations=[
        dict(text="Benzos in<br>Opioids", x=0.5, y=0.5, font_size=20, showarrow=False)
    ]
)
fig.show()

In [26]:
df.columns

Index(['visit date', 'city', 'site', 'expected drug', 'category', 'colour',
       'texture', 'fentanyl strip', 'benzo strip', 'ftir component 0',
       'ftir component 1', 'ftir component 2', 'ftir component 3',
       'ftir component 4', 'ftir component 5', 'contains_opioids',
       'total_opioids', 'ftir_benzo', 'total_benzos'],
      dtype='object')

In [28]:
df['all drugs'] = 'all drugs'
fig = px.sunburst(df, path=['all drugs', 'colour', 'texture'])
fig.show()

In [158]:
cities = (df['city']).unique().tolist()
# g = geocoder.geocodefarm(cities[0])
# g.json['lat'], g.json['lng']

In [54]:
def get_latlng(city):
    g = geocoder.geocodefarm(city)
    lat = g.json['lat']
    lng = g.json['lng']
    return lat, lng

In [134]:
import time
def latlng(cities):
    latlng_dict = {}
    for city in cities:
        time.sleep(5)
        lat, lng = get_latlng(city)
        latlng_dict[city] = [lat, lng]
    return latlng_dict

IndexError: list index out of range

In [198]:
df['city'] = df['city'] + ", BC"
counts = df.value_counts('city')
counts

city
Vancouver, BC               49338
Nelson, BC                   1674
Kamloops, BC                 1177
Surrey, BC                   1116
Kelowna, BC                   780
Nanaimo, BC                   762
Penticton, BC                 751
Cranbrook, BC                 695
Vernon, BC                    585
New Westminster, BC           520
Prince George, BC             332
Salmo, BC                     318
Abbotsford, BC                312
Chilliwack, BC                279
Merritt, BC                   224
Fairmont Hot Springs, BC      209
Grand Forks, BC               197
Trail, BC                     166
Mission, BC                   102
Terrace, BC                    90
Chase, BC                      85
Boston Bar, BC                 51
Hope, BC                       47
Burnaby, BC                    40
Sun Peaks, BC                  39
Maple Ridge, BC                26
Coquitlam, BC                  21
Castlegar, BC                  21
White Rock, BC                 21
Kimberley

In [202]:
latlng = pd.DataFrame(latlng_dict).T.reset_index()
latlng.columns = ['city', 'lat', 'lng']
latlng_df = latlng.merge(counts.to_frame(), on='city').rename({0: 'count'}, axis=1)

In [211]:
latlng_df['text'] = latlng_df['city'] + '<br>' + 'Count: ' + latlng_df['count'].astype(str)
latlng_df = latlng_df.sort_values('count', ascending=False).reset_index(drop=True)
latlng.shape

(35, 3)

In [213]:
limits = [(0, 1), (1, 3)]
for l in limits:
    print(latlng_df[l[0]:l[1]]['city'])

0    Vancouver, BC
Name: city, dtype: object
2    Kamloops, BC
3      Surrey, BC
Name: city, dtype: object


In [241]:
colors = ["royalblue","crimson","lightseagreen","orange","lightgrey"]
cities = []
scale = 10
limits = [(0, 1), (1, 4), (4, 10), (10, 18), (18, 35)]

fig = go.Figure()

for i in range(len(limits)):
    lim = limits[i]
    df_sub = latlng_df[lim[0]:lim[1]]
    fig.add_trace(go.Scattergeo(
        # location = 'Canada'
        # locationmode = 'country names',
        lon = df_sub['lng'],
        lat = df_sub['lat'],
        text = df_sub['text'],
        marker = dict(
            size = df_sub['count']/scale,
            color = colors[i],
            line_color='rgb(40,40,40)',
            line_width=0.5,
            sizemode = 'area'
        ),
        name = '{0} - {1}'.format(lim[0],lim[1])))

fig.update_layout(
        title_text = '2014 US city populations<br>(Click legend to toggle traces)',
        showlegend = True,
        geo = dict(
            resolution = 50,
            lataxis = dict(range=[47,53]),
            lonaxis = dict(range=[-125,-115]),
            landcolor = 'rgb(217, 217, 217)',
            subunitcolor = "rgb(217, 217, 217)",
            countrycolor = "rgb(217, 217, 217)",
            countrywidth = 0.5,
            subunitwidth = 0.5
        )
    )

fig.show()

In [154]:
geo = gpd.read_file(
    f'{CFG.DATA_PATH}\shapefiles\population_centers\pop_centers.geojson'
)

In [180]:
# geo = geo[geo['PCNAME'].isin(cities)]
geo.to_file(os.path.join(CFG.DATA_PATH, 'shapefiles', 'population_centers', 'pop_centers_new.geojson'), driver='GeoJSON')

In [184]:
import folium
m = folium.Map(location=(50.5, -120.12), zoom_start=7)
folium.GeoJson(os.path.join(CFG.DATA_PATH, 'shapefiles', 'population_centers', 'pop_centers_new.geojson')).add_to(m)
m

In [252]:
df['expected_matched'] = [int(d in l) for d, l in zip(df['expected drug'], df[ftirs].values.tolist())]

In [255]:
fig = go.Figure(go.Pie(
    labels=df['expected_matched'].replace({1: "Yes", 0: "No"}).value_counts().index,
    values=df['expected_matched'].value_counts().values,
    hole=0.65
))
fig.update_traces(
    hoverinfo='value+percent', textinfo='label', textfont_size=20,
    marker=dict(line=dict(color='#000000', width=2))
)
fig.update_layout(
    annotations=[
        dict(text="Expected<br>Matched Results", x=0.5, y=0.5, font_size=20, showarrow=False)
    ]
)

In [377]:
replacement_list = {
    'Tucibi': '2c-b',
    'MD-X (Unknown)': '(MDMA|MDA)',
    'Cannabis': '(thc|CBD|cannabidiol)',
    'Changa': 'DMT',
    '2C-Family': '(2c-b|2c-i|2c-t-2|2C-Family)',
    'Speed': 'methamphetamine',
    'Down (Unknown Opioid)': '(.*an(y|i)l|heroin|(code|morph|buprenorph)ine|(oxy|hydro)(cod|morph)one|.*tazene|w-1(8|9)|opium|(furanyl\s)?uf-17|6-mam)',
    'Down': '(.*an(y|i)l|heroin|(code|morph|buprenorph)ine|(oxy|hydro)(cod|morph)one|.*tazene|w-1(8|9)|opium|(furanyl\s)?uf-17|6-mam)'
}

In [379]:
# df['replaced'] = df['expected drug'].replace(replacement_list, regex=False)
df['split_drug'] = df['expected drug'].str.split(' and ')
exp_drugs = pd.DataFrame(df['split_drug'].tolist(), index=df.index)
exp_drugs = exp_drugs.replace(replacement_list, regex=False)

In [406]:
def match_drugs(expected, tested):
    return 1 if re.search(expected, tested, re.IGNORECASE) else 0

In [None]:
def match_drugs(expected, tested):
    score = sum(1 if re.search(drug, test, re.IGNORECASE) else 0 for test in tested)
    return 1 if score == num_exp else 0    

In [385]:
exp = [f'expected_{x}' for x in range(3)]
df[exp] = exp_drugs

In [288]:

df['expected_drug2'] = pd.Series(map(', '.join, split_drug))

In [352]:
df['expected drug'].unique()

array(['Down (Unknown Opioid)', 'Unknown', 'Alprazolam',
       'Methamphetamine', 'Fentanyl', 'MDMA', 'Ketamine', 'Pyrazolam',
       'Opium', 'MD-X (Unknown)', 'MDA', '4-HO-MET', 'Cocaine', 'Codeine',
       'Oxycodone', 'Amphetamine', 'Xylazine', 'Bromazolam', 'Gaboxadol',
       '4-AcO-DMT', 'GHB', 'Crack Cocaine', 'Diazepam', 'Tucibi',
       'Hydromorphone', 'LSD', 'Clonazepam', 'Testosterone Cypionate',
       'Clomifene', 'Tamoxifen', 'Copper Chromite', 'Methandrostenolone',
       'GBL', 'Acetaminophen and Oxycodone', 'Tadalafil', 'Heroin',
       '2C-B', 'CBD', '5-MeO-MiPT', 'Mescaline', 'GW501516',
       'Metonitazene', 'DPT', 'Modafinil', 'Sildenafil',
       'Testosterone Isocaproate', 'DMT', 'Desalkylgidazepam',
       'Benzocaine', 'Ivermectin', 'Quetiapine', 'Flualprazolam',
       'Sildenafil and Tadalafil', '2C-E', 'Fentanyl and Heroin',
       'Tramadol', '1,4-Butanediol', 'Carisoprodol', 'Lorazepam',
       'Deschloroetizolam', 'FXE', '4-AcO-MET', 'Oxandrolone',
  

In [364]:
df[df['expected drug'] == 'Down and Methamphetamine']

Unnamed: 0,visit date,city,site,expected drug,category,colour,texture,fentanyl strip,benzo strip,ftir component 0,ftir component 1,ftir component 2,ftir component 3,ftir component 4,ftir component 5,contains_opioids,total_opioids,ftir_benzo,total_benzos,expected_matched,test,expected_drug2,replaced
1164,2022-12-08,Burnaby,Outreach Hub,Down and Methamphetamine,Polysubstance,Brown,Residue,1,1,Caffeine,Fentanyl,Uncertain carbohydrate,,,,1,1,0,1,0,"[Down, Methamphetamine]","Down, Methamphetamine",Down and Methamphetamine
2820,2023-01-20,Penticton,P+OPS Mobile (Penticton),Down and Methamphetamine,Polysubstance,Green,Chunk,1,-1,Fentanyl,Caffeine,Erythritol,,,,1,1,0,0,0,"[Down, Methamphetamine]","Down, Methamphetamine",Down and Methamphetamine
4033,2023-01-04,Cranbrook,ANKORS (Cranbrook),Down and Methamphetamine,Polysubstance,Brown,Chunk,1,1,Erythritol,Uncertain match,Fentanyl,Caffeine,,,1,1,0,1,0,"[Down, Methamphetamine]","Down, Methamphetamine",Down and Methamphetamine
5090,2022-11-14,Penticton,Martin St Outreach Centre OPS,Down and Methamphetamine,Polysubstance,Brown,Paste,1,-1,Water,Thc,,,,,0,1,0,0,0,"[Down, Methamphetamine]","Down, Methamphetamine",Down and Methamphetamine
5091,2022-11-14,Vancouver,Get Your Drugs Tested,Down and Methamphetamine,Polysubstance,Pink,Pebble,1,1,Caffeine,Methamphetamine,Mannitol,,,,0,1,0,1,0,"[Down, Methamphetamine]","Down, Methamphetamine",Down and Methamphetamine
5404,2022-11-28,New Westminster,Purpose Society,Down and Methamphetamine,Polysubstance,Green (light),Chunk,1,-1,Caffeine,Erythritol,Methamphetamine,Fentanyl,,,1,1,0,0,0,"[Down, Methamphetamine]","Down, Methamphetamine",Down and Methamphetamine
5483,2022-11-25,Prince George,POUNDS,Down and Methamphetamine,Polysubstance,Purple,Chunk,1,1,Caffeine,Methamphetamine,Fentanyl,,,,1,1,0,1,0,"[Down, Methamphetamine]","Down, Methamphetamine",Down and Methamphetamine
5988,2022-11-15,Vancouver,Get Your Drugs Tested,Down and Methamphetamine,Polysubstance,Black,Residue,1,-1,Methamphetamine,Caffeine,Uncertain carbohydrate,,,,0,1,0,0,0,"[Down, Methamphetamine]","Down, Methamphetamine",Down and Methamphetamine
6259,2022-11-07,Penticton,P+OPS Mobile (Penticton),Down and Methamphetamine,Polysubstance,Yellow (light),Powder,1,1,Caffeine,Para-fluorofentanyl,Erythritol,,,,1,1,0,1,0,"[Down, Methamphetamine]","Down, Methamphetamine",Down and Methamphetamine
6512,2022-11-01,Hope,Fraser Canyon Clinic,Down and Methamphetamine,Polysubstance,Brown,Residue,1,1,Methamphetamine,Caffeine,Uncertain carbohydrate,Uncertain match,,,0,1,0,1,0,"[Down, Methamphetamine]","Down, Methamphetamine",Down and Methamphetamine


In [347]:
def test(x):
    return 1 if re.search(
    '(.*an(y|i)l|heroin|(code|morph|buprenorph)ine|(oxy|hydro)(cod|morph)one|.*tazene|w-1(8|9)|opium|(furanyl\s)?uf-17|6-mam)',
    x,
    re.IGNORECASE
) else 0

test2 = df['ftir component 0'].apply(lambda x: test(x))
t2 = test2[test2 == 1].index

In [330]:
test1 = df['ftir component 0'].apply(lambda x: opioid_present(x))
t1 = test1[test1 == 1].index

In [13]:
df

Unnamed: 0,visit date,city,site,expected drug,category,colour,texture,fentanyl strip,benzo strip,ftir component 0,ftir component 1,ftir component 2,ftir component 3,ftir component 4,ftir component 5,contains_opioids,total_opioids,ftir_benzo,total_benzos
0,2023-01-31,Penticton,Fairhaven,Down (Unknown Opioid),Opioid,Purple,Chunk,1,-1,Fentanyl,Erythritol,Caffeine,,,,1,1,0,0
1,2023-01-31,Vancouver,Get Your Drugs Tested,Unknown,Unknown,Brown (light),Chunk,1,-1,Uncertain match,Fentanyl,,,,,1,1,0,0
2,2023-01-31,Vancouver,Get Your Drugs Tested,Alprazolam,Depressant,Green (light),Chunk,-1,1,Flualprazolam,Microcrystalline cellulose,,,,,0,0,1,1
3,2023-01-31,Nanaimo,CMHA,Down (Unknown Opioid),Opioid,Pink,Powder,1,1,Fentanyl,Mannitol,Caffeine,Bromazolam,,,1,1,1,1
4,2023-01-31,Cranbrook,ANKORS (Cranbrook),Methamphetamine,Stimulant,Colourless,Crystal,-1,0,Methamphetamine,,,,,,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,2018-12-03,Vancouver,Insite,MDMA,Psychedelic,Black,Granules,0,0,Uncertain match,Phenacetin,,,,,0,0,0,0
59996,2018-12-03,Vancouver,Insite,Fentanyl,Opioid,Purple,Granules,1,0,Caffeine,Inositol,Heroin hcl,Fentanyl,,,1,1,0,0
59997,2018-12-03,Vancouver,Getaway,Unknown,Unknown,Colourless,Crystal,-1,0,Methamphetamine,,,,,,0,0,0,0
59998,2018-12-03,Vancouver,Getaway,Fentanyl,Opioid,Green,Pebble,1,0,Fentanyl,Caffeine,Mannitol,Mannitol,Caffeine,Fentanyl,1,1,0,0


In [154]:
# incorrect_strip = df[(df['contains_opioids'] == 1) & ((df['fentanyl strip'] == -1) | (df['fentanyl strip'] == 0))]
# incorrect_ftir = df[(df['fentanyl strip'] == 1) & (df['contains_opioids'] == 0)]

# strip_diff = df[df['fentanyl strip'] == 1].index.difference(df[df['contains_opioids'] == 1].index)
# strip_diff = df.iloc[strip_diff]

(2691, 5489)