# Exploring geographic data from Broolyn Food Waste

Some exploratory questions:

1. Do foods labeled as organic get trashed differently than foods not labeled organic?
2. What is the relationship between date the food was collected and the date labeled?

I will also plot the location data using the Folium

See Dashboard on Tableau Public [Here](https://public.tableau.com/app/profile/brandon.scott3793/viz/BrooklynFoodWaste_16234221843090/BrooklynFoodWasteDashboard)

In [2]:
# Import of libraries
import pandas as pd 
import numpy as np 
import folium
import plotly.express as px

In [3]:
df= pd.read_csv(r'C:\Users\Mech\Desktop\Programming\PythonProjects\Portfolio Projects\Brooklyn_food_waste\brooklyn.csv')
df.head()

Unnamed: 0,id,date_collected,retailer_type,retailer_detail,food_type,food_detail,label_type,label_language,label_date,approximate_dollar_value,image_id,collection_lat,collection_long,label_explanation
0,5e31d5503b85a2e63d634187,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic oatmeal,sticker,use by,2020-01-28,5.25,IMG_1872,40.69453,-73.99447,
1,5e31d5643b85a2e63d634188,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic oatmeal,sticker,use by,2020-01-28,5.25,IMG_1873,40.69453,-73.99447,
2,5e31d8903b85a2e63d634189,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic middle eastern salad,sticker,use by,2020-01-28,10.0,IMG_1874,40.69453,-73.99447,
3,5e31dcee3b85a2e63d63418a,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic oatmeal,sticker,use by,2020-01-28,6.25,IMG_1875,40.69453,-73.99447,
4,5e31dd273b85a2e63d63418b,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic mexican salad,sticker,use by,2020-01-28,10.0,IMG_1876,40.69453,-73.99447,


In [4]:
df.shape

(171, 14)

In [5]:
df.loc[df.duplicated()]

Unnamed: 0,id,date_collected,retailer_type,retailer_detail,food_type,food_detail,label_type,label_language,label_date,approximate_dollar_value,image_id,collection_lat,collection_long,label_explanation


In [6]:
df.isnull().sum()

id                            0
date_collected                0
retailer_type                 0
retailer_detail               0
food_type                     0
food_detail                   0
label_type                    0
label_language               51
label_date                    1
approximate_dollar_value      0
image_id                      0
collection_lat                0
collection_long               0
label_explanation           170
dtype: int64

In [7]:
df.dtypes

id                           object
date_collected               object
retailer_type                object
retailer_detail              object
food_type                    object
food_detail                  object
label_type                   object
label_language               object
label_date                   object
approximate_dollar_value    float64
image_id                     object
collection_lat              float64
collection_long             float64
label_explanation            object
dtype: object

In [8]:
# Turn date_collected and label date into datetime
df['date_collected'] = pd.to_datetime(df['date_collected'], errors='ignore')
df['label_date']= pd.to_datetime(df['label_date'], errors='coerce')

In [9]:
df['retailer_type'].unique()

array(['counter service', 'health food grocer', 'drugstore', 'coffeeshop',
       'bakery/deli', 'chain grocer'], dtype=object)

In [10]:
df.dtypes

id                                  object
date_collected              datetime64[ns]
retailer_type                       object
retailer_detail                     object
food_type                           object
food_detail                         object
label_type                          object
label_language                      object
label_date                  datetime64[ns]
approximate_dollar_value           float64
image_id                            object
collection_lat                     float64
collection_long                    float64
label_explanation                   object
dtype: object

In [11]:
df.drop(['image_id', 'label_explanation'], axis=1, inplace=True)
df.head()

Unnamed: 0,id,date_collected,retailer_type,retailer_detail,food_type,food_detail,label_type,label_language,label_date,approximate_dollar_value,collection_lat,collection_long
0,5e31d5503b85a2e63d634187,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic oatmeal,sticker,use by,2020-01-28,5.25,40.69453,-73.99447
1,5e31d5643b85a2e63d634188,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic oatmeal,sticker,use by,2020-01-28,5.25,40.69453,-73.99447
2,5e31d8903b85a2e63d634189,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic middle eastern salad,sticker,use by,2020-01-28,10.0,40.69453,-73.99447
3,5e31dcee3b85a2e63d63418a,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic oatmeal,sticker,use by,2020-01-28,6.25,40.69453,-73.99447
4,5e31dd273b85a2e63d63418b,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic mexican salad,sticker,use by,2020-01-28,10.0,40.69453,-73.99447


## Lets get a general understanding of the food collected

How many stores were visited?<br>
How much food was recovered from each store type?

In [12]:
#Using latitude and longitude as unique identifier to count locations visited

df.groupby(['collection_lat', 'collection_long', 'retailer_type']).agg({'id':'count'})

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id
collection_lat,collection_long,retailer_type,Unnamed: 3_level_1
40.6912,-73.98716,drugstore,14
40.69335,-73.9915,health food grocer,39
40.69415,-73.99233,drugstore,38
40.69453,-73.99447,counter service,9
40.6946,-73.99395,coffeeshop,2
40.69485,-73.9949,bakery/deli,35
40.69789,-73.99267,chain grocer,34


## The above tables shows location, retailer type, and the number of foods collected at each

First, how much food was collected from each type of retailer?

In [13]:
df['retailer_type'].value_counts().reset_index()
fig1=px.bar(df['retailer_type'].value_counts().reset_index(),x='index',y='retailer_type', labels={'index':'Retailer Type','retailer_type':'Number of Foods Recovered'}, title='Food Recovered by Retailer Type')
fig1.show()

## The most food was recovered from drugstores, the least from a coffee shop


Do foods labeled as organic get trashed differently than foods not labeled organic?
We can use the food-detail column to grab whether or not the food is organic

In [14]:
df.head()

Unnamed: 0,id,date_collected,retailer_type,retailer_detail,food_type,food_detail,label_type,label_language,label_date,approximate_dollar_value,collection_lat,collection_long
0,5e31d5503b85a2e63d634187,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic oatmeal,sticker,use by,2020-01-28,5.25,40.69453,-73.99447
1,5e31d5643b85a2e63d634188,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic oatmeal,sticker,use by,2020-01-28,5.25,40.69453,-73.99447
2,5e31d8903b85a2e63d634189,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic middle eastern salad,sticker,use by,2020-01-28,10.0,40.69453,-73.99447
3,5e31dcee3b85a2e63d63418a,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic oatmeal,sticker,use by,2020-01-28,6.25,40.69453,-73.99447
4,5e31dd273b85a2e63d63418b,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic mexican salad,sticker,use by,2020-01-28,10.0,40.69453,-73.99447


In [15]:
df.loc[~df['food_detail'].str.contains('organic')]

Unnamed: 0,id,date_collected,retailer_type,retailer_detail,food_type,food_detail,label_type,label_language,label_date,approximate_dollar_value,collection_lat,collection_long
9,5e442b00c435958777371fc6,2020-02-11,health food grocer,"ready-to-eat, shelf-stable, frozen, and perish...",perishable,yogurt,package printed,,2020-02-11,3.99,40.69335,-73.99150
10,5e442be9c435958777371fc7,2020-02-11,health food grocer,"ready-to-eat, shelf-stable, frozen, and perish...",perishable,yogurt,package printed,,2020-02-11,3.99,40.69335,-73.99150
12,5e4435a1c435958777371fc9,2020-02-11,health food grocer,"ready-to-eat, shelf-stable, frozen, and perish...",perishable,yogurt,package printed,,2020-01-01,1.25,40.69335,-73.99150
18,5e444759c435958777371fcf,2020-02-11,health food grocer,"ready-to-eat, shelf-stable, frozen, and perish...",perishable,brown rice torilla,package printed,t2513 dv,2019-09-11,4.38,40.69335,-73.99150
20,5e444b3bc435958777371fd1,2020-02-11,health food grocer,"ready-to-eat, shelf-stable, frozen, and perish...",perishable,millet and chia sliced bread,sticker,,2020-02-11,6.99,40.69335,-73.99150
...,...,...,...,...,...,...,...,...,...,...,...,...
166,6025c0f0725181c8c293646d,2021-02-05,drugstore,"ready-to-eat, shelf-stable, and frozen food",perishable,mandarin orange fruit cup,package printed,best by,2021-07-20,1.99,40.69120,-73.98716
167,6025c0fb725181c8c293646e,2021-02-05,drugstore,"ready-to-eat, shelf-stable, and frozen food",perishable,mandarin orange fruit cup,package printed,best by,2021-07-20,1.99,40.69120,-73.98716
168,6025c15f725181c8c293646f,2021-02-05,drugstore,"ready-to-eat, shelf-stable, and frozen food",perishable,flatbread pepperoni pizza,package printed,use by,2021-02-05,1.99,40.69120,-73.98716
169,6025c1ed725181c8c2936470,2021-02-05,drugstore,"ready-to-eat, shelf-stable, and frozen food",perishable,mandarin orange fruit cup,package printed,bb,2021-12-12,1.99,40.69120,-73.98716


## This creates a new column to label whether the food is labeled oganic or not

1 = labeled organic
0 = not labeled organic

In [16]:
condition=[
    (df['food_detail'].str.contains('organic') == True),
    (df['food_detail'].str.contains('organic') == False)]
value=[1,0]
df['is_organic']=np.select(condition, value)
df.head()

Unnamed: 0,id,date_collected,retailer_type,retailer_detail,food_type,food_detail,label_type,label_language,label_date,approximate_dollar_value,collection_lat,collection_long,is_organic
0,5e31d5503b85a2e63d634187,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic oatmeal,sticker,use by,2020-01-28,5.25,40.69453,-73.99447,1
1,5e31d5643b85a2e63d634188,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic oatmeal,sticker,use by,2020-01-28,5.25,40.69453,-73.99447,1
2,5e31d8903b85a2e63d634189,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic middle eastern salad,sticker,use by,2020-01-28,10.0,40.69453,-73.99447,1
3,5e31dcee3b85a2e63d63418a,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic oatmeal,sticker,use by,2020-01-28,6.25,40.69453,-73.99447,1
4,5e31dd273b85a2e63d63418b,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic mexican salad,sticker,use by,2020-01-28,10.0,40.69453,-73.99447,1


In [50]:
px.pie(values=df['is_organic'].value_counts(), names=('Not Organic', 'Organic'), title='83% of food waste was not labeled Organic')

# Time Evaluation
## Time will be evaluated in a "days past label" format
numbers over 0 represents the food was past its label date

We should assume the majority of food is tossed around 0

In [18]:
df['days_past_label']= df.date_collected - df.label_date
df['days_past_label_int']= df['days_past_label'].dt.days #turns time delta back to into for better math

In [19]:
df.head()

Unnamed: 0,id,date_collected,retailer_type,retailer_detail,food_type,food_detail,label_type,label_language,label_date,approximate_dollar_value,collection_lat,collection_long,is_organic,days_past_label,days_past_label_int
0,5e31d5503b85a2e63d634187,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic oatmeal,sticker,use by,2020-01-28,5.25,40.69453,-73.99447,1,-1 days,-1.0
1,5e31d5643b85a2e63d634188,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic oatmeal,sticker,use by,2020-01-28,5.25,40.69453,-73.99447,1,-1 days,-1.0
2,5e31d8903b85a2e63d634189,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic middle eastern salad,sticker,use by,2020-01-28,10.0,40.69453,-73.99447,1,-1 days,-1.0
3,5e31dcee3b85a2e63d63418a,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic oatmeal,sticker,use by,2020-01-28,6.25,40.69453,-73.99447,1,-1 days,-1.0
4,5e31dd273b85a2e63d63418b,2020-01-27,counter service,ready-to-eat and prepackaged health food,ready-to-eat,organic mexican salad,sticker,use by,2020-01-28,10.0,40.69453,-73.99447,1,-1 days,-1.0


In [20]:
df['days_past_label_int'].mean()

-21.63855421686747

In [21]:

days_mean= df['days_past_label'].mean()

print('The average food was thrown out ', days_mean, 'past its label date')

The average food was thrown out  -22 days +08:42:28.192771085 past its label date


In [22]:
df.groupby('is_organic')['days_past_label_int'].mean()

is_organic
0   -22.805755
1   -15.629630
Name: days_past_label_int, dtype: float64

## On average, food was thrown away 2-3 weeks before its label date.
### Non Organic food was thrown away sooner than organic foods
### However, organic food was thrown away at a higher rate

In [23]:
df.groupby('label_language')['days_past_label_int'].mean().sort_values()

label_language
by                 -625.000000
bb                 -229.500000
best used by       -226.000000
best by            -212.666667
p6493               -88.000000
exp                 -53.500000
best if used by     -28.500000
12619                -3.000000
expires              -1.000000
sell by              -0.903226
use by               -0.214286
enjoy by              0.000000
sell thru             0.520000
for                   1.000000
use or freeze by      7.000000
p5b 16:50            25.000000
t2513 dv            153.000000
Name: days_past_label_int, dtype: float64

In [45]:
df.to_csv('brooklyn_clean.csv')

# Mapping Geographic Data

### The map detailed below will give us a visualization of the neighborhood and other geographical features related to the locations detailed in the data

In [35]:
location_dic= {'latitude':'collection_lat','longitude':'collection_long','retailer_type':'retailer_type','dollar_value_total':'approximate_dollar_value'}
location_data = pd.DataFrame()
for key in location_dic:
    location_data[key]= df[location_dic[key]]

location_data=location_data.groupby(['latitude', 'longitude', 'retailer_type']).sum().reset_index()

In [36]:
location_data

Unnamed: 0,latitude,longitude,retailer_type,dollar_value_total
0,40.6912,-73.98716,drugstore,46.86
1,40.69335,-73.9915,health food grocer,283.8
2,40.69415,-73.99233,drugstore,100.22
3,40.69453,-73.99447,counter service,50.75
4,40.6946,-73.99395,coffeeshop,7.5
5,40.69485,-73.9949,bakery/deli,247.45
6,40.69789,-73.99267,chain grocer,203.6


In [43]:
#we can use Folium for powerful geomapping
import folium

#create a map using latitude and longitude, respectively, of general Brooklyn through a google search
 
map=folium.Map(width='50%', 
                height='50%', 
                location=[40.69531557428469, -73.99306729689098], #Google was used to find a near by intersection for locatin coordinates.
                zoom_start=15)

#create a feature group to add to the map
dumpsters = folium.map.FeatureGroup()

#create loop to add location of dumpsters to the map
for lat, lon, in zip(location_data.latitude, location_data.longitude):
    dumpsters.add_child(
        folium.features.CircleMarker(
            [lat,lon],
            radius= 8,
            color= 'blue',
            fill= True,
            fill_color= 'red',
            fill_opacity=0.7
        )
    )
#adding some text to markers
latitudes = list(location_data.latitude)
longitudes = list(location_data.longitude)
store_label = location_data.retailer_type
dollar_label= location_data.dollar_value_total.round(2)

for lat, lon, label1, label2 in zip(latitudes, longitudes, store_label,dollar_label ):
    folium.Marker([lat, lon], popup=[label1, label2]).add_to(map)

#add features to map
map.add_child(dumpsters)