#### Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm

# code to ignore warnings
import warnings
warnings.filterwarnings("ignore")

root_csv = '../csv files/'
root_pickle = '../pickle files/'

#### Reading Files

In [None]:
df_obese = pd.read_excel(root_pickle + 'obese_County_Data_New.xlsx')

In [None]:
# Restricting to required columns
df_obese = df_obese[['State','County','CountyFIPS','Year','Tax_Type','Rate','Total Absolute','Total Percentage','Gender','Gender Absolute','Gender Percentage','Age','Age Obesity Absolute','Age Obesity Percentage','Notes_On_Tax_Rate']]

In [None]:
# Check for data type
df_obese.dtypes

In [None]:
# Converting data type
columns_to_convert = ['Total Absolute', 'Total Percentage', 'Gender Absolute', 'Gender Percentage', 'Age Obesity Absolute', 'Age Obesity Percentage']
for column in columns_to_convert:
    df_obese[column] = pd.to_numeric(df_obese[column], errors='coerce', downcast='float')

In [None]:
df_obese.head(5)

In [None]:
# Check for Null Values
df_obese.isna().sum()

In [None]:
df_obese.dropna(subset={'Total Percentage','Total Absolute'},inplace=True)

In [None]:
# Pivot the Tax_Type column
df_obese_pivot = df_obese.pivot_table(index=['State', 'County', 'CountyFIPS', 'Year', 'Total Absolute','Total Percentage',
                                      'Gender', 'Gender Absolute', 'Gender Percentage','Age', 'Age Obesity Absolute',
                                     'Age Obesity Percentage'],
                           columns='Tax_Type', values='Rate', aggfunc='mean').reset_index()

# Rename the columns for clarity
df_obese_pivot.columns.name = None  # Remove the 'Tax_Type' label

df_obese_pivot.head(5)

In [None]:
df_obese_pivot.isna().sum()

#### Saving pickle files for unified Obesity dataset pivot

In [None]:
df_obese_pivot.to_pickle(root_pickle + 'Obesity_Unified_Pivot.pkl')

In [None]:
df_obese_pivot = pd.read_pickle(root_pickle +'Obesity_Unified_Pivot.pkl')
df_obese_pivot.head(5)

#### Saving pickle file for total abolute and percentage 

In [None]:
df_obese_total_pivot = df_obese_pivot[['State','County','CountyFIPS', 'Year', 'Total Absolute','Total Percentage','Grocery Tax','Restaurant Tax']]
df_obese_total_pivot.drop_duplicates(inplace=True)

In [None]:
df_obese_total_pivot['tax_delta'] = df_obese_total_pivot['Restaurant Tax'] - df_obese_total_pivot['Grocery Tax']

In [None]:
df_obese_total_pivot['Population'] = (df_obese_total_pivot['Total Absolute']*100) / df_obese_total_pivot['Total Percentage']

In [None]:
df_obese_total_pivot.to_pickle(root_pickle + 'Obesity_Total_Pivot.pkl')

In [None]:
df_obese_total_pivot = pd.read_pickle(root_pickle +'Obesity_Total_Pivot.pkl')
df_obese_total_pivot.head(5)

#### Saving pickle file for gender abolute and percentage 

In [None]:
df_obese_gender_pivot = df_obese_pivot[['State','County','CountyFIPS', 'Year', 'Gender Absolute','Gender Percentage','Gender','Grocery Tax','Restaurant Tax']]
df_obese_gender_pivot.drop_duplicates(inplace=True)

In [None]:
df_obese_gender_pivot['tax_delta'] = df_obese_gender_pivot['Restaurant Tax'] - df_obese_gender_pivot['Grocery Tax']

In [None]:
df_obese_gender_pivot['Population'] = (df_obese_gender_pivot['Gender Absolute']*100) / df_obese_gender_pivot['Gender Percentage']

In [None]:
df_obese_gender_pivot.to_pickle(root_pickle + 'Obesity_Gender_Pivot.pkl')

In [None]:
df_obese_gender_pivot = pd.read_pickle(root_pickle +'Obesity_Gender_Pivot.pkl')
df_obese_gender_pivot.head(5)

#### Saving pickle file for age abolute and percentage

In [None]:
df_obese_age_pivot = df_obese_pivot[['State','County','CountyFIPS', 'Year', 'Age Obesity Absolute','Age Obesity Percentage','Age','Grocery Tax','Restaurant Tax']]
df_obese_age_pivot.drop_duplicates(inplace=True)

In [None]:
df_obese_age_pivot['tax_delta'] = df_obese_age_pivot['Restaurant Tax'] - df_obese_age_pivot['Grocery Tax']

In [None]:
df_obese_age_pivot['Population'] = (df_obese_age_pivot['Age Obesity Absolute']*100) / df_obese_age_pivot['Age Obesity Percentage']

In [None]:
df_obese_age_pivot.to_pickle(root_pickle + 'Obesity_Age_Pivot.pkl')

In [None]:
df_obese_age_pivot = pd.read_pickle(root_pickle +'Obesity_Age_Pivot.pkl')
df_obese_age_pivot.head(5)