# Analysis of Reported Adverse Food Events

## Library Load

In [1]:
%matplotlib inline

In [1]:
import sys
sys.path.insert(0, '../src')

In [2]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from nltk.corpus import stopwords
import matplotlib.style as style 
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from plotly.offline import plot
import numpy as np
import re
import string
from visualization.visualize import brands_vs_outcomes_plot,plot_bar_histogram


## Load Data

In [3]:
aggReports=pd.read_csv("../data/processed/clean_data.csv",index_col=0)
expl_aggReports = pd.read_csv("../data/processed/exploded_out.csv",index_col=0)
aggReports_brand = pd.read_csv("../data/processed/clean_brand_data.csv",index_col=0)



Columns (1,6) have mixed types.Specify dtype option on import or set low_memory=False.



In [4]:
# relevant serious outcomes 
relv_outcomes = ["Death","Life Threatening","Hospitalization","Disability","Patient Visited ER"]

## Which are the major categories causing serious outcomes?

In [5]:
top_cat_df = aggReports.groupby(["category"])['report_id'].count().sort_values(ascending=False).reset_index()[:5].rename(columns={'report_id':'#events'})

fig_pie = px.pie(top_cat_df, values='#events', names='category',width=1200,height=800)
fig_pie.update_layout(uniformtext_minsize=24, uniformtext_mode='hide')
fig_pie.update_layout(legend = dict(font = dict(family = "Arial", size = 24, color = "black")),
                  legend_title = dict(font = dict(family = "Arial", size = 24, color = "blue")))                  
fig_pie.show()

## Which serious outcomes are caused by major categories?

In [6]:
# Plot category wise outcomes

df=expl_aggReports.dropna()

relv_categories = list(df.groupby("category")['report_id'].count().sort_values(ascending=False).reset_index()['category'].values[:5])

df = df[df['outcomes'].isin(relv_outcomes)]
df = df[df['category'].isin(relv_categories)]

df.category.unique()
cat_labels={'Nuts/Edible Seed': "Nuts",'Vit/Min/Prot/Unconv Diet(Human/Animal)':"Vitamins",'Vegetables/Vegetable Products':"Vegetables","Soft Drink/Water":"Soft drinks",'Cosmetics':"Cosmetics"}
df['category'] = pd.Categorical(df.category).rename_categories(cat_labels)

plot_bar_histogram(df,title="Category-wise outcomes distribution",x="category",color="outcomes",barmode="stack",logscale=True)

# Can Cosmetics brand *really* kill you? (not just your purse :P)

It is quite interesting to note that **Wen-L'Oreal-Brazilian Blowout** equally hold the responsilbity for causing the majority deaths cause due to cosmetics. Therefore among top brands, there's 1 out of 3 chances that you can die from *Wen/L'Oreal/Brazilian out*

In [7]:
# Cosmetics Brands vs Outcomes
brands_vs_outcomes_plot(expl_aggReports,"Cosmetics","Cosmetics | Brand-wise Reported Adverse Events Count",relv_outcomes=relv_outcomes)

In [9]:
# Vitamins Brand Clustering
brands_vs_outcomes_plot(expl_aggReports,"Vit/Min/Prot/Unconv Diet(Human/Animal)",title="Vitamins/Minerals | Brand-wise Reported Adverse Events Count",relv_outcomes=relv_outcomes)

In [13]:
# Vegetable Brand Clustering
brands_vs_outcomes_plot(expl_aggReports,"Vegetables/Vegetable Products",title = "Vegetables/Veg-Based | Brand-wise Reported Adverse Events Count",relv_outcomes=relv_outcomes)

## Quorn Outcomes with Time

In [None]:
relv_df= expl_aggReports[expl_aggReports['brand']=="QUORN"]

fig=px.histogram(relv_df,x="caers_created_date",color="outcomes")
fig.show()

In [None]:


top_cat_df = aggReports.groupby(["category"])['report_id'].count().sort_values(ascending=False).reset_index()[:5].rename(columns={'report_id':'#events'})
fig_pie = px.pie(top_cat_df, values='#events', names='category',width=1200,height=800)

fig_pie.update_layout(uniformtext_minsize=24, uniformtext_mode='hide')
fig_pie.update_layout(legend = dict(font = dict(family = "Arial", size = 24, color = "black")),
                  legend_title = dict(font = dict(family = "Arial", size = 24, color = "blue")))                  
fig_pie.show()