<a href="https://www.kaggle.com/code/fabriciositto/accidental-drug-analysis-visualization?scriptVersionId=144508677" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

**Accidental Drug - Analysis + Visualization**


Data cleaning and visualization of some features

## First Steps

In [None]:
#Libraries
import numpy as np 
import pandas as pd 
import geopandas as gpd
import re

import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster
import plotly.express as px
import plotly.graph_objects as go

In [None]:
df=pd.read_csv('/kaggle/input/accidental-drug-related-deaths-20122018/Accidental_Drug_Related_Deaths_2012-2018.csv')
df.head()

In [None]:
df.info()

In [None]:
# we'll work only with the info about the description of the person, location, injury, COD, the type of drugs and the death's date, manner and place

main_col=['ID', 'Date', 'Age', 'Sex', 'Race','DeathCity',
       'Location', 'LocationifOther','COD',
       'Heroin', 'Cocaine', 'Fentanyl', 'FentanylAnalogue', 'Oxycodone',
       'Oxymorphone', 'Ethanol', 'Hydrocodone', 'Benzodiazepine', 'Methadone',
       'Amphet', 'Tramad', 'Morphine_NotHeroin', 'Hydromorphone', 'Other',
       'OpiateNOS', 'AnyOpioid', 'DeathCityGeo']

In [None]:
df_reduced=df[main_col]

In [None]:
df_reduced.head(5)

In [None]:
df_reduced.info()

## Duplicated

In [None]:
df_reduced['ID'].duplicated().sum()

## Handing null values

In [None]:
df_reduced.isnull().sum()

In [None]:
df_reduced[df_reduced.Age.isnull()]

In [None]:
for i in range(len(df_reduced[df_reduced.Age.isnull()])):
    print(df_reduced[df_reduced.Age.isnull()].iloc[i].isnull().sum())

In [None]:
#the most of the columns of this rows are null so just drop it

df_reduced.dropna(subset=(['Age']),inplace=True)

In [None]:
df_reduced.isnull().sum()

In [None]:
for i in range(len(df_reduced[df_reduced.Date.isnull()])):
    print(df_reduced[df_reduced.Date.isnull()].iloc[i].isnull().sum())

In [None]:
df_reduced.dropna(subset=(['Date']),inplace=True)

In [None]:
df_reduced.isnull().sum()

In [None]:
for i in range(len(df_reduced[df_reduced.Sex.isnull()])):
    print(df_reduced[df_reduced.Sex.isnull()].iloc[i].isnull().sum())
    
for i in range(len(df_reduced[df_reduced.Race.isnull()])):
    print(df_reduced[df_reduced.Race.isnull()].iloc[i].isnull().sum())

In [None]:
#we will to replace this values for 'other'
df_reduced.fillna(value={'Sex':'Other','Race':'Other'},inplace=True)

In [None]:
df_reduced.isnull().sum()

In [None]:
#The DeathCity will be important int he analysis so we have to drop row of the null value
df_reduced.dropna(subset=(['DeathCity']),inplace=True)

In [None]:
df_reduced.isnull().sum()

## Location and LocationifOther 
This columns are a particularly case. We will use the info in LocationifOther to complete Location, but before is necesary categorize all the sentence to have a logic number of cetegories

In [None]:
df_reduced[['Location','LocationifOther']][(df_reduced['Location'].isnull().to_numpy() + (df_reduced['Location']=='Other').to_numpy())]

In [None]:
df_reduced['LocationifOther'].unique()

In [None]:
df_reduced['Location'].unique()

In [None]:
df_reduced['Location'].value_counts()

In [None]:
#puedo crear una función que me categorice un array de strings en categorias puntuales
def are_in(keywords,sentence):
    for key in keywords:
        if key in sentence.lower():
            return True
    return False

def categorizador(lista,categorias):
    cats=categorias
    newlist=lista
    for cat in cats:
        for sentence in newlist:
            if are_in(cat[1],sentence):
                cat[2].append(sentence)
        for word in cat[2]:
            newlist.remove(word)
                
    return [[cats],newlist]

In [None]:
other_and_null_values=(df_reduced['Location'].isnull().to_numpy() + (df_reduced['Location']=='Other').to_numpy())
df_reduced.Location[other_and_null_values]=df_reduced.LocationifOther[other_and_null_values]

In [None]:
lista=list(df_reduced['LocationifOther'].unique())+list((df_reduced['Location'].unique()))
lista.remove(np.nan)
lista.remove(np.nan)

categorias=[['care_center',['ymca','rehab','church','center','shelter','hospice','convalescent','nursing'],[]],
            ['other_residence',['other_residence','hous','home','apartment','apt.','friend','father'],[]],
            ['hotel_motel',['hotel','motel','room','inn','honey','lodge','residential','suit'],[]],
            ['own_residence',['residence','behind','basement','backyard','back','porch','stair','front','hallway'],[]],
            ['hospital',['hospital'],[]],
            ['public_buildings',['cemetery','public','train','university','uconn','post'],[]],
            ['car_parking',['park','parking','car','vehicle','driving','trailer','minivan','truck','camper'],[]],
            ['shop',['shop','work','inc','super','gas','donuts','commercial','restaurant','tavern','cube smart', 'gravel pit', 'baldwin pond', 'kik builders', 'price rite', 'choice pet suppy rear', 'taco bell', 'econologe',],[]],
            ['outdoor',['rail','alleyway','boat','sidewalk','area','state','farm','stream','underpass','driveway','field','outdoor','street','outside','wooded','beach','pier','walkway','woods','roadway','lake','abandoned','overpass','yard'],[]]
]

results=categorizador(lista,categorias)
new_cats=results[0]
new_list=results[1]

In [None]:
to_replace=[] #acá van todas las oraciones que se tienen que reemplazar 
for cat in new_cats[0]:
    to_replace.append(cat[2])


value= [] #acá van los valores por los cuales se reemplazaran los anteriores (las nuevas categorias)
for cat in new_cats[0]:
    value.append(cat[0])

In [None]:
for i in range(len(to_replace)):
    df_reduced['Location'].replace(to_replace=to_replace[i], value=value[i], inplace=True)

In [None]:
df_reduced['Location'].fillna('Other', inplace=True)

In [None]:
df_reduced.drop(columns='LocationifOther', inplace=True)

In [None]:
df_reduced.isnull().sum()

## Drugs columns

In [None]:
drugs_col=['Heroin','Cocaine','Fentanyl','FentanylAnalogue','Oxycodone','Oxymorphone','Ethanol','Hydrocodone','Benzodiazepine','Methadone','Amphet','Tramad','Morphine_NotHeroin','Hydromorphone','OpiateNOS','AnyOpioid','Other']
df_drugs=df_reduced[drugs_col]

In [None]:
df_drugs.fillna(False,inplace=True)
df_drugs.replace(r'^(?!False$).*',True,regex=True,inplace=True)

In [None]:
df_drugs.isnull().sum()

In [None]:
df_reduced[df_drugs.columns]=df_drugs

In [None]:
df_reduced.isnull().sum()  #READYYYYYYYYYYYYYYYYYYYY

## Formatos

In [None]:
df_reduced.Date.replace(r' 12:00:00 AM','',regex=True, inplace=True)

In [None]:
df_reduced.Date=pd.to_datetime(df_reduced.Date,format='%m/%d/%Y')

In [None]:
df_reduced.info()

In [None]:
df_reduced.reset_index(inplace=True)

# Visualization

## Time cisualization

In [None]:
fig= px.line(df_reduced['Date'].dt.day.value_counts().sort_index(),title='Deaths per day of month')
fig.show()

In [None]:
fig= px.line(df_reduced['Date'].dt.month.value_counts().sort_index(),title='Deaths per month of year')
fig.show()

In [None]:
fig= px.line(df_reduced['Date'].dt.year.value_counts().sort_index(),title='Deaths per year')
fig.show()

## Geospacial visualization

In [None]:
lat=[]
lon=[]

for idx, row in df_reduced.iterrows():
    lat.append(re.findall(r'[-]\d{2}.\d*|\d{2}.\d*',row['DeathCityGeo'])[0])
    lon.append(re.findall(r'[-]\d{2}.\d*|\d{2}.\d*',row['DeathCityGeo'])[1])
    
df_reduced['Latitude']=pd.Series(lat)
df_reduced['Longitude']=pd.Series(lon)

In [None]:
df_reduced=gpd.GeoDataFrame(df_reduced, geometry=gpd.points_from_xy(df_reduced.Longitude,df_reduced.Latitude))

In [None]:
df_reduced[['Latitude','Longitude']]=df_reduced[['Latitude','Longitude']].astype('float64')

In [None]:
df_reduced[['Latitude','Longitude','DeathCityGeo']]

In [None]:
map_center=list(df_reduced[['Latitude','Longitude']].median())

In [None]:
m = folium.Map(location=map_center, zoom_start=10)

mc = MarkerCluster()
for idx, row in df_reduced.iterrows():
    mc.add_child(Marker([row['Latitude'], row['Longitude']]))
    
m.add_child(mc)

m

In [None]:
m = folium.Map(location=map_center, zoom_start=10)

HeatMap(data=df_reduced[['Latitude','Longitude']],radius=25).add_to(m)

m

## Categorical visualization

In [None]:
px.sunburst(df_reduced,path=['Sex','Location'], title='Death location by sex')

In [None]:
px.sunburst(df_reduced,path=['DeathCity','Location'])