# Analysis of Reported Adverse Food Events

## Library Load

In [1]:
import pandas as pd
import plotly.express as px
import visualize  as vis 

## Load Data

In [4]:
aggReports=pd.read_csv("../../data/processed/processed_data.csv",index_col=0)
expl_aggReports = pd.read_csv("../../data/processed/exploded_data.csv",index_col=0)
expl_aggReports_brands = expl_aggReports[
        ["caers_created_date", "report_id", "product", "category", "outcomes", "brand"]
    ].dropna()

aggReports_time=pd.read_csv("../../data/processed/clean_data_time.csv",index_col=0,parse_dates =['time_stamp'])
expl_aggReports_time = pd.read_csv("../../data/processed/exploded_data_time.csv",index_col=0,parse_dates =['time_stamp'])

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [5]:
# relevant serious outcomes 
relv_outcomes = ["Death","Life Threatening","Hospitalization","Disability","Patient Visited ER"]

## How are number of reported events changing over time?

In [6]:
vis.plot_time_trend(aggReports_time,"number of reports over time")

## Which are the major categories causing serious outcomes?

In [7]:
top_cat_df = aggReports.groupby(["category"])['report_id'].count().sort_values(ascending=False).reset_index()[:5].rename(columns={'report_id':'#events'})

fig_pie = px.pie(top_cat_df, values='#events', names='category',width=1200,height=800)
fig_pie.update_layout(uniformtext_minsize=24, uniformtext_mode='hide')
fig_pie.update_layout(legend = dict(font = dict(family = "Arial", size = 24, color = "black")),
                  legend_title = dict(font = dict(family = "Arial", size = 24, color = "blue")))                  
fig_pie.show()

### Categories across time

In [8]:
group = aggReports_time.groupby("year")
category = pd.DataFrame(aggReports_time["category"].value_counts().items(),
                        columns = ["category","counts"]).sort_values(by = ['counts'])
vis.plot_pie_subplots_yearly(aggReports_time.groupby("year"), "yearly category", "category", dropping=True, d_threshold=1 / 50)

## Outcomes distribution across major categories

In [9]:
# Plot category wise outcomes
relv_categories = list(expl_aggReports_brands.groupby("category")['report_id'].count().sort_values(ascending=False).reset_index()['category'].values[:5])

df = expl_aggReports_brands[expl_aggReports_brands['outcomes'].isin(relv_outcomes)]
df = df[df['category'].isin(relv_categories)]

df.category.unique()
cat_labels={'Nuts/Edible Seed': "Nuts",'Vit/Min/Prot/Unconv Diet(Human/Animal)':"Vitamins",'Vegetables/Vegetable Products':"Vegetables","Soft Drink/Water":"Soft drinks",'Cosmetics':"Cosmetics"}
df['category'] = pd.Categorical(df.category).rename_categories(cat_labels)

vis.plot_bar_histogram(df,title="Category-wise outcomes distribution",x="category",color="outcomes",barmode="stack",logscale=True)

### Are adverse outcomes increasing with time?

In [10]:
outcome_group = expl_aggReports_time.groupby("outcomes")
outcomes = list(outcome_group["outcomes"].agg("count").sort_values(ascending = False).index)
outcomes[0] = "Not Specified"

not_interested = ["Medically Important","Other Outcome","Patient Visited Healthcare Provider","Other Seriousness",
                      "Required Intervention","Congenital Anomaly","Not Specified"]
vis.plot_scatters(outcome_group, outcomes, "outcome over time", fil=True, filter_list=not_interested, plot_now = True)

# Can Cosmetics brand *really* kill you? (not just your purse :P)

In [11]:
# Cosmetics Brands vs Outcomes
vis.brands_vs_outcomes_plot(expl_aggReports_brands,"Cosmetics","Cosmetics | Brand-wise Reported Adverse Events Count",relv_outcomes=relv_outcomes)

### Cosmtics reported events over time

In [12]:
category_group = expl_aggReports_time.groupby("category")
cosmetics = category_group.get_group("Cosmetics")
vis.plot_time_trend(cosmetics, "Cosmetics over time", x_col="date", y_col="counts")

### But what could be the adverse symptoms of cosmetics you say? 

In [13]:
dic=vis.symptom_counter(aggReports,1)
top5=vis.top_symptoms(dic,"Symptoms for Cosmetics")

### Which age group do cosmetics affect the most?

In [14]:
vis.age_dist_plot(expl_aggReports[expl_aggReports['patient_age'] > 0],'Cosmetics')

# Are all Vitamins good for you?? 

### Vitamin brands vs Outcomes

In [15]:
vis.brands_vs_outcomes_plot(expl_aggReports_brands,"Vit/Min/Prot/Unconv Diet(Human/Animal)",title="Vitamins/Minerals | Brand-wise Reported Adverse Events Count",relv_outcomes=relv_outcomes)

### Are Vitamins adverse events increasing with time? 

In [16]:
category_group = expl_aggReports_time.groupby("category")
vitamins = category_group.get_group("Vit/Min/Prot/Unconv Diet(Human/Animal)")
vis.plot_time_trend(vitamins, "Vitamins over time", x_col="date", y_col="counts")

### Common symptoms caused by popular vitamins

In [17]:
vis.top_vitamins_symptom_distribution(aggReports)

### Do vitamins affect all age-groups equally?

In [18]:
vis.age_dist_plot(expl_aggReports[expl_aggReports['patient_age'] > 0],'Vit/Min/Prot/Unconv Diet(Human/Animal)')

## Veggies are good for Health, right? (P.S. Picky eaters say YAY!)

In [19]:
vis.brands_vs_outcomes_plot(expl_aggReports_brands,"Vegetables/Vegetable Products",title = "Vegetables/Veg-Based | Brand-wise Reported Adverse Events Count",relv_outcomes=relv_outcomes)

## Quorn Case-study (Infamous meat-substitute brand?)

### How worse can Quorn affect you?

In [20]:
dic=vis.symptom_counter(aggReports,2)
top5=vis.top_symptoms(dic,"Symptoms for Quorn")

### It's only getting worse with time

In [21]:
relv_df= expl_aggReports_brands[expl_aggReports_brands['brand']=="QUORN"]
relv_df = relv_df.dropna()

fig=px.histogram(relv_df,x="caers_created_date",color="outcomes")
fig.show()