## **Welcome to Week Two of BrainTank Deep Learning:**


Lets get started:

---

First thing we are going to do is run this piece of code that will download important files for this weeks challenge. Take a look at:

1.   DiamondList.csv

In [None]:
!git clone https://github.com/BrainTankDeepLearning/Week2.git

Cloning into 'Week2'...
remote: Enumerating objects: 33, done.[K
remote: Counting objects: 100% (33/33), done.[K
remote: Compressing objects: 100% (33/33), done.[K
remote: Total 33 (delta 13), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (33/33), done.


Helper Functions:
---
Here I have provided two helper functions that will help us use and view some of the data we working with. You DO NOT need to edit any of these, but feel free to play around with them.

In [None]:
import torch
from torch.nn import Module
import numpy as np
import csv
import matplotlib.pyplot as plt
import os
import pandas as pd
import math

def process_data():
  # Reads whites and reds path csv file and returns a pandas
  # 2d array of all the data in it
  diamond_filepath = "Week2/Diamonds.csv"
  diamond_df = pd.read_csv(diamond_filepath)

  diamond_df['D'] = (diamond_df['Colour'] == "D").astype(int)
  diamond_df['E'] = (diamond_df['Colour'] == "E").astype(int)
  diamond_df['F'] = (diamond_df['Colour'] == "F").astype(int)
  diamond_df['G'] = (diamond_df['Colour'] == "G").astype(int)
  diamond_df['H'] = (diamond_df['Colour'] == "H").astype(int)
  diamond_df['I'] = (diamond_df['Colour'] == "I").astype(int)
  diamond_df['J'] = (diamond_df['Colour'] == "J").astype(int)

  for diamond_clarity in ["FL", "IF", "VVS1", "VVS2", "VS1", "VS2", "SI1", "SI2"]:
    diamond_df[diamond_clarity] =  (diamond_df['Clarity'] == diamond_clarity).astype(int)

  for diamond_shape in ["Cushion", "Emerald", "Heart", "Oval", "Pear", "Princess", "Radiant", "Round"]:
    diamond_df[diamond_shape] =  (diamond_df['Shape'] == diamond_shape).astype(int)

  diamond_df.to_csv("DiamondList.csv")

  return diamond_df

#Reads data from csv file
def read_data():
  diamond_filepath = "Week2/DiamondList.csv"
  diamond_df = pd.read_csv(diamond_filepath)

  diamond_df = diamond_df.drop(columns = ["Unnamed: 0", "Co_Ref_No", "Shape", "Price_CA$", \
                             "Cut", "Colour", "Clarity", "Shape_No"])
  
  diamond_df["Price"] = np.log10(diamond_df["Price"])
  diamond_df = diamond_df.sample(frac = 1, random_state = 0)

  diamond_df[["Carat", "Price"]]=(diamond_df[["Carat", "Price"]]-diamond_df[["Carat", "Price"]].min())/(diamond_df[["Carat", "Price"]].max()-diamond_df[["Carat", "Price"]].min())
  diamond_df.to_csv("DiamondList.csv")

  diamond_df.to_csv("TrimmedMysteryDiamonds.csv")

  diamonds = torch.from_numpy(diamond_df.values)
  diamonds = diamonds.float()

  return diamonds

#this function transforms a normalized cost into
# a dollar value  
def normalized_cost_to_dollars(normalized):
  minimum_value = math.log10(1628.67)
  maximum_value = math.log10(393183.50)

  price = normalized * (maximum_value - minimum_value) + minimum_value
  price = 10 ** price

  return price

# Our Tasks:

---

**1.   Normalize all the necessary columns of data to be between 0 and 1**

Helpful Formula for normalizing a vector (list) of values:

*   x' = x - min(x) / [max(x) - min(x)]

**2.   Create a neural network that we can use to predict the value of a diamond given its Cut (Shape), Colour, Clarity and Carat**

*   Our model will have 24 inputs and go through a neural network with three hidden dimensions. 40 neurons, 20 neurons, and then 10 neurons. Those output to 1 output neuron that will predict the price of the diamond.


**3.   Devize a strategy to test our network and see if it is doin a good or bad job prediciting the cost of diamonds**

In [None]:
class DiamondPredictor(torch.nn.Module):
    def __init__(self):

        super(DiamondPredictor, self).__init__()

        # Your code here:
        self.layer1 = torch.nn.Linear(24, 1000)
        self.layer2 = torch.nn.Linear(1000, 500)
        self.layer3 = torch.nn.Linear(500, 200)
        self.layer4 = torch.nn.Linear(500, 200)
        self.layer5 = torch.nn.Linear(200, 1)

    def forward(self, data):
        out = torch.sigmoid(self.layer1(data))
        out = torch.sigmoid(self.layer2(out))
        out = torch.sigmoid(self.layer3(out))
        out = torch.sigmoid(self.layer4(out))
        out = self.layer5(out)

        return out

def train(model, training_dataset, optimizer, loss_function):
  #your code here
  #train the model
  model.train()
  for epoch in range(100):
    total_epoch_loss = 0
    for row in training_dataset:
      optimizer.zero_grad()

      target = row[0].unsqueeze(0)
      input = row[1:25].unsqueeze(0)

      prediction = model(input).squeeze(0)

      loss = loss_function(target, prediction)

      loss.backward()
      optimizer.step()

      total_epoch_loss += loss.item()

    total_epoch_loss = total_epoch_loss / 7000
    print(total_epoch_loss)


def test(model, test_dataset, loss_fn):
  #test to see if our code works
  #your code here
  correct = 0
  model.eval()
  for row in test_dataset:
    input = row[1:].unsqueeze(0)
    target = row[0]

    prediction = model(input).unsqueeze(0)

    target_plus_20_percent = target + 0.2 * target
    target_minus_20_percent = target - 0.2 * target

    unnormalized_prediction = normalized_cost_to_dollars(prediction)
    unnormalized_target = normalized_cost_to_dollars(target)

    if prediction >= target_minus_20_percent and prediction <= target_plus_20_percent:
      correct += 1
      is_correct = "Correct"
    else:
      is_correct = "Not Correct"

    print(prediction.item(), target.item(), unnormalized_prediction.item(), unnormalized_target.item(), is_correct)
  print("Our model predicted diamonds with a correcntess of ", correct / 845)


#dataset
dataset = read_data()

#model
diamond_model = DiamondPredictor()

#loss
loss_function = torch.nn.MSELoss()

training_set = dataset[0:7000]
testing_set = dataset[7000:7845]

#optimizer
optimizer = torch.optim.Adam(diamond_model.parameters(), lr = 0.0001)

#training
train(diamond_model, training_set, optimizer, loss_function)

#testing
test(diamond_model, testing_set, loss_function)


0.02466496985154277
0.023797298274874513
0.02310932364387809
0.0216178164941272
0.018847825328558002
0.014591815114162604
0.008292041467736735
0.0023463309323910075
0.0010951487364880255
0.0009329524145238421
0.00084938082635661
0.0008051348975247032
0.0007823044684930332
0.0007706151824420897
0.0007644515830906237
0.0007609690317463457
0.0007587976171330213
0.0007572827064237533
0.0007561019149002909
0.0007550911599796006
0.0007541626316849558
0.0007532681327511035
0.0007523800501160258
0.0007514819252914806
0.0007505630164089299
0.0007496161123073064
0.0007486357329332413
0.0007476171495704451
0.0007465559342495287
0.00074544774852912
0.0007442880505300803
0.0007430723116027185
0.0007417954383400597
0.0007404516972667938
0.0007390352731801299
0.0007375394679647726
0.0007359571723050344
0.0007342803505404228
0.00073250022964173
0.0007306071967347958
0.0007285903807352538
0.0007264377862852891
0.0007241361387008609
0.0007216711045646691
0.0007190255886407838
0.0007161822769209886
0.000