In [2]:
import pandas as pd
import numpy as np
import plotly.express as px

from google.colab import drive
drive.mount('/content/drive')  # mounts google drive

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
receipt = pd.read_csv("/content/drive/MyDrive/DISCUS_CF/receipt.csv") #reads the csv receipt
receipt = receipt[receipt.columns[[0,1,2,3,7,8,9]]]
receipt = receipt.dropna()

# remove £ sign
receipt.price = receipt.price.str.split('£').str.get(-1)

receipt.co2_item = pd.to_numeric(receipt.co2_item)


In [None]:
receipt.head()

Unnamed: 0,quantity,weight_unit,total_weight,item,price,co2_kg,co2_item
0,1.0,0.37,0.37,Sainsbury's Broccoli Loose,0.59,0.67,0.25
1,1.0,0.44,0.44,Sainsbury's Onions Loose,0.38,0.24,0.11
2,1.0,0.2,0.8,"Sainsbury's Conference Pears, Ripe & Ready x4",2.0,0.29,0.23
3,1.0,0.24,0.24,Sainsbury's Responsibly Sourced Scottish Salmo...,3.25,3.8,0.91
4,3.0,0.22,0.66,By Sainsbury?s Large Ripe & Ready Avocado,3.0,1.0,0.66


In [7]:
# add a classification variable to visualise high, medium or low impact
low = 3
low_std = 5

medium = 10  # these values are arbitrary, morer research needed 
medium_std = 12             # potentially these could be set by the user orrealtive to average item in the shopping/ or relative to total

receipt.loc[receipt['co2_item'] <= low, 'impact'] = 'low'
receipt.loc[receipt['co2_item'] > medium, 'impact'] = 'high'
receipt.loc[receipt['co2_item'] > low, 'impact'] = 'medium'

receipt.loc[receipt['co2_kg'] <= low, 'impact_std'] = 'low'
receipt.loc[receipt['co2_kg'] > medium, 'impact_std'] = 'high'
receipt.loc[receipt['co2_kg'] > low, 'impact_std'] = 'medium'



In [8]:
#  order items by impact 
  #  primarily impact by item, if items have identical impact, the one that has larger impact per kg/l will be displayed first

receipt = receipt.sort_values(by=['co2_item', 'co2_kg'], ascending=True)
receipt.head()

Unnamed: 0,quantity,weight_unit,total_weight,item,price,co2_kg,co2_item,impact,impact_std
22,1.0,0.1,0.1,Nescafé Azera Americano Instant Coffee,4.6,0.33,0.03,low,low
42,1.0,0.08,0.08,Sainsbury's Fresh Living Large Basil Pot,1.8,0.39,0.03,low,low
12,1.0,0.05,0.05,Sainsbury's Garlic,0.25,0.67,0.03,low,low
31,1.0,0.8,0.8,Heinz Baked Beans Snap Pots 4 Pack,2.5,0.11,0.09,low,low
1,1.0,0.44,0.44,Sainsbury's Onions Loose,0.38,0.24,0.11,low,low


In [9]:
# order items by impact per kg
receipt_std = receipt.sort_values(by=['co2_kg'], ascending=True)

receipt_std.head()

Unnamed: 0,quantity,weight_unit,total_weight,item,price,co2_kg,co2_item,impact,impact_std
31,1.0,0.8,0.8,Heinz Baked Beans Snap Pots 4 Pack,2.5,0.11,0.09,low,low
1,1.0,0.44,0.44,Sainsbury's Onions Loose,0.38,0.24,0.11,low,low
28,1.0,2.0,2.0,"Sainsbury's Vivaldi White Potatoes, Taste the ...",2.6,0.25,0.49,low,low
2,1.0,0.2,0.8,"Sainsbury's Conference Pears, Ripe & Ready x4",2.0,0.29,0.23,low,low
5,1.0,0.19,1.14,Sainsbury's Braeburn Apples x6,1.6,0.31,0.36,low,low


In [10]:

fig = px.bar(receipt, 
             x='co2_item',
             y='item',
             orientation = 'h', 
             color = 'impact', 
             width=1200, height=1000,
             hover_name="item",
             hover_data = {"impact"},
             labels={ 
                  "co2_item": "co\u2082 equivalent (kg)", "item":""},
             color_discrete_map={ 
                "low": "green", "medium": "orange", "high" : "red"},
             template="simple_white")

fig.update_layout( # customize font and legend orientation & position
    showlegend = False,
    title= { "text": "My Carbon Footprint", "x" : 0.5, "xanchor": "center", "yanchor": "middle", "font" : {"size" : 25}  }
     )



fig.update_xaxes(visible = False )
fig.update_yaxes(ticks=""  )

fig.show()

In [24]:
# plot based on standardised impact ( ie. disregards weight of item)


fig_std = px.bar(receipt_std, 
             x='co2_kg',
             y='item',
             orientation = 'h', 
             color = 'impact_std', 
             width=1200, height=1000,
             hover_name="item",
             hover_data = {"impact_std"},
             labels={ 
                  "co2_item": "co\u2082 equivalent (kg)", "item":""},
             color_discrete_map={ 
                "low": "green", "medium": "orange", "high" : "red"},
             template="simple_white")

fig_std.update_layout( # customize font and legend orientation & position
    showlegend = False,
    title= { "text": "My Carbon Footprint", "x" : 0.5, "xanchor": "center", "yanchor": "middle", "font" : {"size" : 25}  }
     )



fig_std.update_xaxes(visible = False )
fig_std.update_yaxes(ticks=""  )

fig_std.show()

In [11]:
#pick top 3 offenders
top_cf = receipt.tail(n=3).reset_index(drop=True)
top_cf




Unnamed: 0,quantity,weight_unit,total_weight,item,price,co2_kg,co2_item,impact,impact_std
0,1.0,0.2,0.2,Sainsbury's Parmigiano Reggiano Cheese,3.3,14.08,2.82,low,medium
1,3.0,0.13,0.39,"Sainsbury's Buffalo Mozzarella Cheese, Taste t...",6.0,8.48,3.31,medium,medium
2,1.0,1.75,1.75,"Sainsbury's Apple & Mango Juice, Not From Conc...",1.95,2.96,5.18,medium,low


In [12]:
# ************suggest nutritionally similar items with lower cf (co2_kg)**********

# !pip install nltk  
import nltk # natural language processing library
nltk.download('punkt')
from nltk.tokenize import word_tokenize

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [13]:
# to remove stopwords

from nltk.corpus import stopwords
nltk.download('stopwords')
sr= stopwords.words('english')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [14]:
# for word stemming
from nltk.stem import PorterStemmer
from nltk.tokenize import sent_tokenize, word_tokenize

port_stem = PorterStemmer()



In [15]:
top1 = top_cf.item[0]
top2 = top_cf.item[1]
top3 = top_cf.item[2]

In [16]:
# word tokenize

top1= word_tokenize(top1)
top2= word_tokenize(top2)
top3= word_tokenize(top3)



# print(("Word tokenizing the text \n {}, {}, {}").format(top1, top2, top3))

In [17]:
# remove stop words

top1 = [w for w in top1 if not w in sr]
top2 = [w for w in top2 if not w in sr]
top3 = [w for w in top3 if not w in sr]

print(('text after removing stop words \n {},\n {},\n {}').format(top1, top2, top3))

text after removing stop words 
 ['Sainsbury', "'s", 'Parmigiano', 'Reggiano', 'Cheese'],
 ['Sainsbury', "'s", 'Buffalo', 'Mozzarella', 'Cheese', ',', 'Taste', 'Difference'],
 ['Sainsbury', "'s", 'Apple', '&', 'Mango', 'Juice', ',', 'Not', 'From', 'Concentrate']


In [18]:
# word stemming & word lemmatizing
top1_stem = []
top2_stem =[]
top3_stem = []
top_stems = [top1_stem, top2_stem, top3_stem] 
top_items = [top1, top2, top3]

for item in top_items:
   for word in  top_items[top_items.index(item)]:
     top_stems[top_items.index(item)].append(port_stem.stem(word))


# will get stems - useful for database retrieval? or matching?

In [19]:
# go through nutri csv and find an item with as many matching stems as possible
  # change path if outside google colab

nutri = pd.read_csv("/content/drive/MyDrive/DISCUS_CF/nutri_proximates.csv")
#nutri.columns
nutri = nutri [['Food Name','Group', 'Protein (g)', 'Fat (g)', 'Carbohydrate (g)', 'Energy (kcal) (kcal)' ]]


In [20]:
nutri = nutri.dropna()
nutri.head()


Unnamed: 0,Food Name,Group,Protein (g),Fat (g),Carbohydrate (g),Energy (kcal) (kcal)
2,"Ackee, canned, drained",DG,2.9,15.2,0.8,151
3,"Agar, dried",DG,1.3,1.2,Tr,16
4,"Agar, dried, soaked and drained",DG,0.2,0.1,Tr,2
5,"Alfalfa sprouts, raw",DG,4.0,0.7,0.4,24
6,"Allspice, ground",H,6.1,8.7,N,N


In [None]:

# check that string contains substring
  # repeat for each element of the stemmed word
  # if true, add 1 to  a variable
  # in the end choose an item with highest value count

In [None]:
# for each item get protein content (standardised)
# search for item with similar protein content (or as close as possible,higher better than lower)

In [23]:
# dash - choose one of three visualisations
# %pip install dash
import dash
import dash_core_components as dcc
import dash_html_components as html

Collecting dash
[?25l  Downloading https://files.pythonhosted.org/packages/b2/8f/4e5f73f7c1fce28c7cfb5fc7c6c5b2a2e672e5b6ac8c45100bd80a829c11/dash-1.21.0.tar.gz (1.1MB)
[K     |▎                               | 10kB 16.4MB/s eta 0:00:01[K     |▋                               | 20kB 23.4MB/s eta 0:00:01[K     |█                               | 30kB 28.5MB/s eta 0:00:01[K     |█▏                              | 40kB 30.6MB/s eta 0:00:01[K     |█▌                              | 51kB 23.3MB/s eta 0:00:01[K     |█▉                              | 61kB 24.1MB/s eta 0:00:01[K     |██▏                             | 71kB 22.8MB/s eta 0:00:01[K     |██▍                             | 81kB 23.8MB/s eta 0:00:01[K     |██▊                             | 92kB 24.0MB/s eta 0:00:01[K     |███                             | 102kB 24.9MB/s eta 0:00:01[K     |███▎                            | 112kB 24.9MB/s eta 0:00:01[K     |███▋                            | 122kB 24.9MB/s eta 0:00:0

In [None]:
app = dash.Dash(__name__)



app.layout = html.Div(style = {
  'backgroundColor': '#a6bd2e'
}, children = [
    html.H1(
    children = 'My carbon footprint',
    style = {
      'textAlign': 'center',
      'color': '#7FDBFF'
    }
  ),

    html.Div(children = ' interactive data visualization.', style = {
    'textAlign': 'center',
    'color': '#7FDBFF'
  }),

    dcc.Graph(
    id = 'carbon footprint',
    figure = fig
  )
])

if __name__ == '__main__':
  app.run_server(debug = True)