<a href="https://colab.research.google.com/github/ishaliu/storytelling-with-data/blob/master/storytelling-with-data/data-stories/flexitarian/Flexitarian.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

In [None]:
# Import statements for relevant libraries
import numpy as np
import pandas as pd
import plotnine as pn
import csv
import plotly
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import seaborn as sns
import bokeh as bk
from matplotlib import pyplot as plt

Upload Files:
*   projected_emissions.csv
*   MeatSurvey.csv
*   2019USCitiesIndexResults.csv
*   Food_Production.csv
*   google.csv
*   milk.csv


In [None]:
from google.colab import files
files.upload()

# Keara's Code


## Data

###Projected Emissions

In [None]:
# Source: https://www.statista.com/statistics/263980/forecast-of-global-carbon-dioxide-emissions/
projected = pd.read_csv('/content/projected_emissions.csv')
projected

In [None]:
projected = projected.drop(0)
projected = projected.drop(1)
projected = projected.drop(2)
projected = projected.rename(columns={'Unnamed: 1': 'Emissions'})
projected = projected.rename(columns={'Global CO2 emissions 2018-2050': 'Year'})
projected['Year'] = [2020, 2025, 2030, 2035, 2040, 2045, 2050]

In [None]:
#should probably not use this bc combining the datasets made things conflict
projected['Year']=projected['Year'].astype(int)
emissions2['Year']=emissions2['Year'].astype(int)
projected['Emissions']=projected['Emissions'].astype(float)
emissions2['Emissions']=emissions2['Emissions'].astype(float)
merged = projected.merge(emissions2, how='outer')
merged = merged.sort_values(by = 'Year')
merged

###Vegan/Vegetarian Emissions

Multiple vegetarian news sources cite that 2.7% of the world is vegan and 11.3% of the world is vegetarian.

https://wtvox.com/lifestyle/2019-the-world-of-vegan-but-how-many-vegans-are-in-the-world/
https://www.futurekind.com/blogs/vegan/vegan-statistics

We will run on an assumption that this is true despite a lack of formal literature.

A peer reviewed study by Scarborough et al. in *Climatic Change* found that medium meat eaters produce 5.63 kg of dietary CO2 equivalent per day, whereas vegetarians produce 3.81 kg and vegans produce 2.89 kg.
https://link.springer.com/article/10.1007/s10584-014-1169-1

In [None]:
world_pop = 7900000000
meat_eaters = world_pop*.86
vegans = world_pop*.027
vegetarians = world_pop*.113

In [None]:
veggie_monday = meat_eaters*(1.82)*52*0.00110231/1000000000
vegan_monday = meat_eaters*(2.74)*52*0.00110231/1000000000

In [None]:
fully_veggie = meat_eaters*(1.82)*365*0.00110231/1000000000
fully_vegan = meat_eaters*(2.74)*365*0.00110231/1000000000

In [None]:
projected['Emissions']
projected['Meatless Monday'] = (projected['Emissions'] - veggie_monday)
projected['Vegan Monday'] = (projected['Emissions'] - vegan_monday)
projected['Fully Meatless'] = (projected['Emissions'] - fully_veggie)
projected['Fully Vegan'] = (projected['Emissions'] - fully_vegan)

In [None]:
projected

In [None]:
data = [['High Meat', 7.19], ['Medium Meat', 5.63], ['Low Meat', 4.67],['Vegetarian', 3.81], ['Vegan', 2.89]]
food_emissions = pd.DataFrame(data, columns=['Diet', 'Emissions'])

## Visualizations

In [None]:
fig, ax1 = plt.subplots(figsize=(10,8))

#palette = sns.diverging_palette(145, 300, s=60, as_cmap=True)
ax1 = sns.barplot(x='Diet', y='Emissions', data = food_emissions, palette='Set2')
ax1.set_title('Individual Daily Dietary Emissions', fontsize=16)
ax1.set_ylabel('CO2 Equivalent (Kg)', fontsize=14)
ax1.set_xlabel('Diet', fontsize=14)

plt.show()

In [None]:
fig, ax1 = plt.subplots(figsize=(10,8))
sns.lineplot(x='Year', y='Emissions', data=projected)
sns.lineplot(x='Year', y='Meatless Monday', data=projected)
sns.lineplot(x='Year', y='Vegan Monday', data=projected)
sns.lineplot(x='Year', y='Fully Meatless', data=projected)
sns.lineplot(x='Year', y='Fully Vegan', data=projected)
plt.ylabel('CO2 Equivalent (Billion Metric Tons)', fontsize=14);
plt.xlabel('Year', fontsize=14)
plt.title('Worldwide Projected Total Emissions', fontsize=16);
plt.legend(labels=['No Change', 'Meatless Monday', 'Vegan Monday', 'Fully Meatless', 'Fully Vegan'], fontsize=12);

# Isha's Code

## Data

###Sustainable Cities

In [None]:
# Source: 2019 Sustainable Development report - https://www.sustainabledevelopment.report/reports/2019-us-cities-sustainable-development-report/

sustainablecities = pd.read_csv('/content/2019USCitiesIndexResults.csv')[["Region", "maincity", "score_sdgi", "rank_sdgIndex"]]
sustainablecities.head()

In [None]:
cities_region = sustainablecities.groupby('Region').mean()
cities_region["Region"] = ["Northeast", "Midwest", "South", "West"]
cities_region.head()

### Diet Survey
1,500 participants from the US were asked:

Over the next 12 months how, if at all, are you planning to change your diet?

In [None]:
# Source: https://d25d2506sfb94s.cloudfront.net/cumulus_uploads/document/ueji3eha6i/econTabReport.pdf
meatsurvey = pd.read_csv('/content/MeatSurvey.csv')  
meatsurvey.insert(2, "Stacked", "Yes")

meatsurvey.head()

## Visualizations

In [None]:
from plotnine import *

(ggplot(meatsurvey)
  + aes(x = "Stacked", y = "Percent", fill = "Response")
  + geom_col(width = 0.5)
  + coord_flip()
  + scale_fill_manual(values = ["#6b705c", "#cb997e", "#ddbea9", "#ffe8d6"])
  + theme_void()
  + labs(title = "Over the next 12 months how, if at all, are you planning to change your diet?")
)

In [None]:
(ggplot(sustainablecities)
  + aes(x = "maincity", y = "score_sdgi", fill = "score_sdgi")
  + geom_col(width = 0.75)
  + scale_x_discrete(limits=sustainablecities["maincity"].tolist())
  + scale_fill_gradient(low = "#EDDF99", high = "#2B9700")
  + theme_void()
  + labs(title = "US Cities Ranked by Sustainability Index Score", fill = "Sustainability Index Score")

)

In [None]:
(ggplot(cities_region)
  + aes(x = "Region", y = "score_sdgi", fill = "Region")
  + geom_col(width = 0.75, show_legend=False)
  + scale_x_discrete(limits=["West", "Northeast", "Midwest", "South"])
  + scale_fill_manual(values = ["#b5c99a", "#97a97c", "#e9f5db", "#718355"])
  + theme_minimal()
  + labs(title = "US Regions Ranked By Average Sustainability Index Score", y = "Sustainability Index Score")
)

# Amelia's Code

## Food Production

In [None]:
df = pd.read_csv("Food_Production.csv") #dataset taken from Kaggle: https://www.kaggle.com/selfvivek/environment-impact-of-food-production

df.head(n = 25)

In [None]:
df2 = df.sort_values("Total_emissions", ascending = True)
df2

In [None]:
fig = px.bar(df2, x = "Food product", y = "Total_emissions", 
                 color = "Total_emissions",
                 color_continuous_scale = "Bluered")
fig.show()

In [None]:
food_list = ["nuts","fruit","vegetables","vegetables","vegetables","fruit","vegetables","vegetables","fruit","vegetables","fruit","vegetables","Soymilk","fruit","grain","grain","sugar","wine","fruit","grain","grain","grain","nuts","sugar","milk","tofu",
             "oil","oil","grain","eggs","fish","oil","oil","poultry meat","pig meat","oil","shrimp","coffee","dark chocolate","beef (dairy herd)","cheese","lamb & mutton","beef (beef herd)"]
df2["type"] = food_list
df2.head()

In [None]:
df_cat = pd.DataFrame(df2['Total_emissions'].groupby([df2['type']]).sum())
df_cat.head(n=25)

In [None]:
list2 = ["Soymilk","beef (beef herd)","beef (dairy herd)", "cheese","coffee","dark chocolate","eggs","fish","fruit","grain","lamb & mutton",
         "milk","nuts","oil","pig meat","poultry meat","shrimp","sugar","tofu","vegetables","wine"]
df_cat["type2"] = list2
df_cat

In [None]:
df_cat2 = df_cat.sort_values("Total_emissions", ascending = True)
df_cat2.head(n=5)

In [None]:
fig = px.bar(df_cat2, y = "type2", x = "Total_emissions", 
                 color = "Total_emissions",
                 color_continuous_scale = "Bluered",
             orientation = 'h',
             title = "Total Emissions of Popular Foods",
             labels = {
                 "type2": "Food category",
                 "Total_emissions": "Total Emissions"
             })
fig.show()

In [None]:
fig = px.bar(df2, y = "Food product", x = "Total_emissions", 
                 color = "Total_emissions",
                 color_continuous_scale = "Bluered",
             orientation = 'h',
              #title = "Total Emissions of Popular Foods",
             labels = {
                 "type2": "Food category",
                 "Total_emissions": "Total Emissions"
             })

fig.update_layout({
    'plot_bgcolor': 'rgba(0,0,0,0)'},
    #'paper_bgcolor': 'rgba(0,0,0,0)'},
    height = 500,
    width = 1000,
    font = dict( size = 20)
)

fig.show()

In [None]:
fig = px.bar(df2, x = "Food product", y = "Greenhouse gas emissions per 100g protein (kgCO₂eq per 100g protein)")
fig.show()

In [None]:
#Eutrophying emissions per 100g protein (gPO₄eq per 100 grams protein)
fig = px.scatter(df2, x = "Food product", y = "Eutrophying emissions per 100g protein (gPO₄eq per 100 grams protein)")
fig.show()

In [None]:
fig = px.scatter(df2, x = "Food product", y = "Greenhouse gas emissions per 1000kcal (kgCO₂eq per 1000kcal)")
fig.show()

In [None]:
fig = px.scatter(df2, x = "Food product", y = "Scarcity-weighted water use per 100g protein (liters per 100g protein)")
fig.show()

In [None]:
fig = px.bar(df2, x = "Food product", y = "Scarcity-weighted water use per kilogram (liters per kilogram)")
fig.show()

In [None]:
fig = px.bar(df2, x = "Food product", y = "Freshwater withdrawals per 100g protein (liters per 100g protein)")
fig.show()

## Google Vegan/Vegetarian Trends

In [None]:
goog = pd.read_csv("google.csv") #Data taken from Google Trends: https://trends.google.com/trends/explore?date=all&geo=US&q=vegan
goog.head()

In [None]:
goog2 = pd.melt(goog, id_vars=['Month'], var_name='Type',value_name='Score')
goog2.head()

In [None]:
fig = px.line(goog2, x = "Month", y = "Score",
              color = "Type",
              #title = "Relative Popularity of Google Searches for 'Vegan' and 'Vegetarian'",
              labels = {
                  "Month":"Year"
              })

fig.update_layout({
    'plot_bgcolor': 'rgba(0,0,0,0)',
    'paper_bgcolor': 'rgba(0,0,0,0)'
})


fig.update_layout(legend=dict(
    yanchor="top",
    y=0.95,
    xanchor="left",
    x=0.05
))

fig.show()

## Milk Trends

In [None]:
milk = pd.read_csv("milk.csv")
milk.head()

In [None]:
milk2 = pd.melt(milk, id_vars=['Month'], var_name='Type',value_name='Score')
milk2.head()

In [None]:
fig = px.line(milk2, x = "Month", y = "Score",
              color = "Type")
fig.show()