Everything below is only a **sample** solution and is **not** the only model or the best model.

In [None]:
from keras.datasets import fashion_mnist
from keras.datasets import imdb
from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization)
from tensorflow.keras.models import Sequential
import numpy as np
import matplotlib.pyplot as plt

### Exercise 1

You want to buy a ring for yourself. You enter a store and they tell you the costing method of the ring. It costs 500 dollars to make the ring and it costs an extra 100 dollars for adding one diamond to the ring. So it would cost 600 dollars for a ring with 1 diamond and 1100 dollars for a ring with 6 diamonds. Create a model which takes an input for number of diamonds and outputs the cost

Hint:<br/>
1) use a small scale value(have your output in terms of thousands of dollars)<br/>
2) Bigger the dataset, better the prediction<br/>
3) In this case, number of epochs will improve your prediction but it leads to overfitting (we will discuss this later in the workshop) 

In [None]:
xs=[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
ys=[0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4]
model = Sequential([Dense(units=1, input_shape=[1])])
model.compile(optimizer = 'sgd', loss = 'mean_squared_error')
model.fit(xs,ys,epochs = 400)

In [None]:
print(model.predict([0]))
print(model.predict([5]))
print(model.predict([10]))

### Exercise 2  


Create a convolutional neural network to classify images of the Fashion MNIST dataset.

Fashion-MNIST is a dataset of Zalando's article images consisting of a training set of 60,000 examples and a test set of 10,000 examples.

Each example is a 28x28 grayscale image, associated with a label from 10 classes.

**Load dataset**

In [None]:
#load data from fashion_mnist into sets for training and testing
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

#assign names to labels
class_names={0:'top', 1:'trouser', 2:'pullover', 3:'dress', 4:'coat', 5:'sandal', 6:'shirt', 7:'sneaker', 8:'bag', 9:'boot'}

#visualize data
plt.figure(figsize=(10,10))
for i in range(9):
    plt.subplot(3,3,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images[i], cmap='binary')
    plt.xlabel(class_names[train_labels[i]])
plt.show()

**Pre-process data** 

In [None]:
#normalize the images
train_norm = train_images/ 255.0
test_norm = test_images/ 255.0

#reshape the images
train_norm = train_norm.reshape((train_norm.shape[0], 28, 28, 1))
test_norm = test_norm.reshape((test_norm.shape[0], 28, 28, 1))

**Build model**

In [None]:
model = Sequential([
    Conv2D(32, kernel_size=3, activation='relu'),
    MaxPooling2D(pool_size=2),
    Flatten(),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])

**Train model**

In [None]:
model.compile(optimizer = 'adam', loss= 'sparse_categorical_crossentropy', metrics = ['accuracy'])
model.fit(train_norm, train_labels, epochs=5)

**Evaluate perfomance**

In [None]:
loss, acc = model.evaluate(train_norm,  train_labels)
print("Training : loss={:.3f} - acc={:.3f}".format(loss, acc))

loss, acc = model.evaluate(test_norm,  test_labels)
print("Test : loss={:.3f} - acc={:.3f}".format(loss, acc))

In [None]:
predictions = np.argmax(model.predict(test_norm), axis=1) 

#visualize predictions 
plt.figure(figsize=(10, 10))
for i in range(50):
    plt.subplot(5,10,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(test_images[i], cmap='binary')
    actual_class=class_names[test_labels[i]]
    if predictions[i]==test_labels[i]:
      plt.xlabel(actual_class, color='green')
    else:
      plt.xlabel(actual_class, color='red')
plt.show()


**Improve model**

Added a convolutional layer and dropout layers 

In [None]:
model_improved = Sequential([
    Conv2D(32, kernel_size=3, activation='relu'),
    MaxPooling2D(pool_size=2),
    Dropout(0.25),
    Conv2D(64, kernel_size=3, activation='relu'),
    Dropout(0.25),
    Flatten(),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
]) 

In [None]:
model_improved.compile(optimizer = 'adam', loss= 'sparse_categorical_crossentropy', metrics = ['accuracy'])
model_improved.fit(train_norm, train_labels, epochs=5)

**Re-evaluate performance**

In [None]:
loss, acc = model_improved.evaluate(train_norm,  train_labels)
print("Training : loss={:.3f} - acc={:.3f}".format(loss, acc))

loss, acc = model_improved.evaluate(test_norm,  test_labels)
print("Test : loss={:.3f} - acc={:.3f}".format(loss, acc))

In [None]:
predictions = np.argmax(model_improved.predict(test_norm), axis=1) 

#visualize predictions 
plt.figure(figsize=(10,10))
for i in range(50):
    plt.subplot(5,10,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(test_images[i], cmap='binary')
    actual_class=class_names[test_labels[i]]
    if predictions[i]==test_labels[i]:
      plt.xlabel(actual_class, color='green')
    else:
      plt.xlabel(actual_class, color='red')
plt.show()


### Exercise 3 


Create a model to categorise movie reviews of the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/).

This is a dataset of movies reviews from IMDB, labeled by sentiment (1 for positive and 0 for negative) consisting of 25000 training examples and 25000 testing examples.

Reviews have been preprocessed, and each review is encoded as a list of word indices (integers). Words are indexed by overall frequency in the dataset, for instance the integer "3" encodes the 3rd most frequently occuring word.


**Load dataset**

In [None]:
#only the num_words most frequent words are kept
#any less frequent word will appear as out-of-vocabulary character value, by default 2, in the sequence 
(training_data, training_targets), (testing_data, testing_targets) = imdb.load_data(num_words=10000)

#merge training examples and testing examples, so that we can change from 50/50 to 80/20 split distribution
data = np.concatenate((training_data, testing_data), axis=0)
targets = np.concatenate((training_targets, testing_targets), axis=0)

**Pre-process data**

In [None]:
#to visualize predictions later, keep a copy of non-vectorized test examples
check_x = data[:10000]
check_y = targets[:10000]

def vectorize(sequences, dimension = 10000):
  results = np.zeros((len(sequences), dimension))
  for i, sequence in enumerate(sequences):
    results[i, sequence] = 1
  return results

#vectorize reviews and labels
data = vectorize(data)
targets = np.array(targets).astype("float32")
#10K reviews for testing
test_x = data[:10000]
test_y = targets[:10000]
#40K reviews for training
train_x = data[10000:]
train_y = targets[10000:]

**Build model**

Sample model below is not a CNN, but you can make one.

In [None]:
model = Sequential([
      Dense(50, activation = "relu", input_shape=(10000, )),
      Dropout(0.25),
      Dense(50, activation = "relu"),
      Dropout(0.25),
      Dense(50, activation = "relu"),
      Dense(1, activation = "sigmoid")
])

model.summary()

**Train model and evaluate performance**

In [None]:
model.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])

results = model.fit(train_x, train_y, epochs= 2, batch_size = 500, validation_data = (test_x, test_y))
 #batch size is number of training examples utilized in one iteration, improves performance in many scenarios
print("Test-Accuracy:", np.mean(results.history["accuracy"]))

In [None]:
#the get_word_index function retrieves a dictionary mapping words to their index in the dataset
index = imdb.get_word_index()     
reverse_index = dict([(value, key) for (key, value) in index.items()]) 

#to read the review
#note that indices were offset by 3, because 0, 1 and 2 are reserved indices for "padding", "start of sequence", and "unknown"
decoded = " ".join( [reverse_index.get(i - 3, "#") for i in check_x[0]] )
print(decoded)

**Predict**

In [None]:
#binary classification
sentiment = {1: 'positive', 0:'negative'}

#check prediction for the review you have read above
predictions = model.predict(test_x)

#output of your network should be a scalar between 0 and 1, encoding a probability 
#output is the likelihood of reviews being positive
predictions = np.round(predictions).astype('int64')
print("predicted: ", sentiment[predictions[0][0]])
print("actual: ", sentiment[check_y[0]])