**CalgaryHacks 2021 Project**

**Name:** Isha Afzaal

**Group #:** 74

**Info:** Use machine learning to predict weight of waste based on usage data from the City of Calgary. Will be used in the project to show users current conditions of the City and overall progress towards larger goals such as sustainability. Data taken from Green_Cart_Waste_Composition_2020 file obtained at: https://data.calgary.ca/Help-and-Information/Green-Cart-Waste-Composition-2020-By-Material/mmck-7yr8

In [2]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

Upload training data

In [3]:
from google.colab import files
uploaded = files.upload()

Saving Green_Cart_Waste_Composition_2020.csv to Green_Cart_Waste_Composition_2020.csv


In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
import pandas as pd
import numpy as np

# --- CSV File Loading and Data Preprocessing code is from the TensorFlow tutorial: https://www.tensorflow.org/tutorials/load_data/csv ---

# Make numpy values easier to read
np.set_printoptions(precision=3, suppress=True)

# Upload heart disease data for training and testing
train = pd.read_csv("Green_Cart_Waste_Composition_2020.csv")
train.head()

# Target of data: chd
train_features = train.copy()
train_labels = train_features.pop('Weight %')

# Pre-process the data
inputs = {}

for name, column in train_features.items():
  dtype = column.dtype
  if dtype == object:
    dtype = tf.string
  else:
    dtype = tf.float32
  inputs[name] = tf.keras.Input(shape=(1,), name = name, dtype = dtype)

# Concatenate numeric inputs before passing them into a normalization layer
numeric_inputs = {name:input for name, input in inputs.items() if input.dtype == tf.float32}

x = layers.Concatenate() (list(numeric_inputs.values()))
norm = preprocessing.Normalization()
norm.adapt(np.array(train[numeric_inputs.keys()]))
all_numeric_inputs = norm(x)

# Collect all prepropessing results for both training and testing
preprocessed_inputs = [all_numeric_inputs]

# Concatenate String inputs for training and testing
for name, input in inputs.items():
  if input.dtype == tf.float32:
    continue
  lookup = preprocessing.StringLookup(vocabulary=np.unique(train_features[name]))
  one_hot = preprocessing.CategoryEncoding(max_tokens=lookup.vocab_size())
  x = lookup(input)
  x = one_hot(x)
  preprocessed_inputs.append(x)

# Concatenate ALL preprocessed inputs
preprocessed_inputs_cat = layers.Concatenate()(preprocessed_inputs)
train_preprocessing = tf.keras.Model(inputs, preprocessed_inputs_cat)
tf.keras.utils.plot_model(model = train_preprocessing , rankdir="LR", dpi=72, show_shapes=True)

# Convert Panda datafram to dictionary
train_features_dict = {name: np.array(value) for name, value in train_features.items()}

# Take slice of first training example
features_dict = {name:values[:1] for name, values in train_features_dict.items()}
train_preprocessing(features_dict)

# --- Make & Train Model ---
print("--Make model--")
def my_model (preprocessing_head, inputs):
  body = tf.keras.Sequential([   
      tf.keras.layers.Dense(64, activation='relu'),
      tf.keras.layers.Dense(1)
  ])
  preprocessed_inputs = preprocessing_head(inputs)
  result = body(preprocessed_inputs)
  model = tf.keras.Model(inputs, result)
  optimizer = 'adam'
  loss = tf.losses.BinaryCrossentropy(from_logits=True)
  model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
  return model

model = my_model(train_preprocessing, inputs)
# --- End of code inspired by tutorial ---

print("--Fit model--")
model.fit(x=train_features_dict, y=train_labels, epochs=100)

# Evaluate model
print("--Evaluate model--")
model_loss, model_acc = model.evaluate(x=train_features_dict,  y=train_labels, verbose=2)
print(f"Model Loss:    {model_loss:.2f}")
print(f"Model Accuracy: {model_acc*100:.1f}%")