# COMP 1801 IT Lab: Cost function 
02 Oct 2020 
## Today's objective
- To learn how the parameter determines a hypothesis
- To learn the meaning of the cost function

# Import libraries and define functions
- Please press SHIFT + ENTER on the following cell to execute it.

In [None]:
# Importing libraries and defining functions to be used. Press Shift+Enter on this cell to execute the following.

#!pip install -U plotly
#!pip install kaleido
import plotly
import plotly.graph_objs as go
import plotly.io as pio
pio.renderers.default = 'colab'
import numpy as np
import sklearn
from sklearn.datasets import load_boston, load_breast_cancer, fetch_california_housing
import pandas as pd
from IPython.display import display, Math

float_space = (lambda pos: np.concatenate([-pos[::-1], [0.], pos]))(np.array([(1 + d) * ((2 ** p)/(2 ** 8)) for p in np.arange(0, 16) for d in np.arange(0, 2 ** 6) / (2 ** 6)]))

insert_one = lambda x: np.pad(x, ((0, 0), (1, 0)), constant_values=1)

def prepare_data(x_series, y_series, train_slice, test_slice, hypothesis=None, create_feature=None, regularization_weight=0.0):
  x_train = x_series[train_slice]
  x_test = x_series[test_slice]
  y_train = y_series[train_slice]
  y_test = y_series[test_slice]
  if hypothesis is None:
    if create_feature is None:
      create_feature=lambda x: x[:, np.newaxis] ** np.arange(2)[np.newaxis, :]
    normalize = lambda raw_feature, reference: (raw_feature - np.mean(reference, axis=0, keepdims=True)) / np.std(reference, axis=0, keepdims=True) 
    normalize_except_zeroth_column = lambda raw_feature, reference: np.concatenate([raw_feature[:, :1], normalize(raw_feature[:, 1:], reference[:, 1:])], axis=1)
    linear_regression = lambda x, y: np.linalg.pinv(x.T @ x + (1/len(x_train)) * regularization_weight * np.diag(np.arange(x.shape[1])>0)) @ x.T @ y[:, np.newaxis]
    normalize_for_regression = lambda x: normalize_except_zeroth_column(create_feature(x), create_feature(x_train))
    coeff = linear_regression(normalize_for_regression(x_train), y_train)
    hypothesis = lambda x: np.reshape(normalize_for_regression(x) @ coeff, [-1])
  hypothesis_on_train = hypothesis(x_train)
  hypothesis_on_test = hypothesis(x_test)
  square_x = lambda x, y, hypothesis: np.stack(np.broadcast_arrays(x, x - (y - hypothesis(x))/2, x, x + (y - hypothesis(x))/2, x, np.nan), axis=1)
  square_y = lambda x, y, hypothesis: np.stack(np.broadcast_arrays(y, (y + hypothesis(x))/2, hypothesis(x), (y + hypothesis(x))/2, y, np.nan), axis=1)
  x = {
    'train': x_train,
    'new': [e for x in x_test for e in [x, x, None]],
    'hypothesis': float_space,
    'predict': x_test,
    'test': x_test,
    'train_error': [e for x in x_train for e in [x, x, None]],
    'test_error': [e for x in x_test for e in [x, x, None]],
    'train_squared_error': np.reshape(square_x(x_train, y_train, hypothesis), [-1]),
    'test_squared_error': np.reshape(square_x(x_test, y_test, hypothesis), [-1]),
  }
  y = {
    'train': y_train,
    'new': [e for _ in x_test for e in [np.finfo(float).min, np.finfo(float).max, None]],
    'hypothesis': hypothesis(float_space),
    'predict': hypothesis(x_test),
    'test': y_test,
    'train_error': [e for hypothesis, y in zip(hypothesis_on_train, y_train) for e in [hypothesis, y, None]], 
    'test_error': [e for hypothesis, y in zip(hypothesis_on_test, y_test) for e in [hypothesis, y, None]], 
    'train_squared_error': np.reshape(square_y(x_train, y_train, hypothesis), [-1]),
    'test_squared_error': np.reshape(square_y(x_test, y_test, hypothesis), [-1]),
  }
  return x, y

def arrange_args(args, traces):
  dictionary = {trace: {arg: par[trace] if trace in par else par['default'] for arg, par in args.items() if (trace in par) or ('default' in par)} for trace in traces}
  return dictionary

def get_marker(traces):
  args = dict(
    size={
      'default': 16.0,
    },
    symbol={
      'train': 'x-thin',
      'predict': 'x-thin',
      'test': 'x-thin',
    },
    line_color={
      'train': 'black',
      'predict': 'blue',
      'test': 'red',
    },
    line_width={
      'train': 1,
      'predict': 2,
      'test': 2,
    },
    opacity={
      'default': 1.0,
    }
  )
  marker = arrange_args(args, traces)
  return marker

def get_line(traces):
  args = dict(
    dash={
      'new': 'dot'  
    },
    color={
      'new': 'red',
      'hypothesis': 'blue',
      'train_error': 'black',
      'test_error': 'red',
      'train_squared_error': 'black',
      'test_squared_error': 'red',
    },
    width={
      'new': 1,
      'hypothesis': 4,
      'train_error': 2,
      'test_error': 2,
      'train_squared_error': 1,
      'test_squared_error': 1,
    }
  )
  line = arrange_args(args, traces)
  return line

def get_axes(xrange=None, yrange=None, xtitle=None, ytitle=None, title_font_size=24):
  default = dict(
    zeroline=True, 
    zerolinewidth=2, 
    zerolinecolor='LightPink', 
    titlefont=dict(size=title_font_size), 
  )
  xaxis = dict(
    range=[-1, 16] if xrange is None else xrange, 
    title='' if xtitle is None else xtitle,
    **default,
  )  
  yaxis = dict(
    range=[-1, 10] if yrange is None else yrange, 
    title='' if ytitle is None else ytitle,
    scaleanchor='x', 
    scaleratio=1,
    **default,
  )
  return xaxis, yaxis

def get_fig(x_series, y_series, train_slice, test_slice, hypothesis=None, create_feature=None, visible={'default': True}, title='', xrange=None, yrange=None, xtitle=None, ytitle=None, regularization_weight=0.0):
  mode = {
    'train': 'markers',
    'new': 'lines',
    'hypothesis': 'lines',
    'predict': 'markers',
    'train_error': 'lines',
    'train_squared_error': 'lines',
    'test': 'markers',
    'test_error': 'lines',
    'test_squared_error': 'lines',
  }
  xaxis, yaxis = get_axes(xrange, yrange, xtitle, ytitle)
  marker = get_marker([trace for trace in mode if mode[trace] in ['markers', 'lines+markers']])
  line = get_line([trace for trace in mode if mode[trace] in ['lines', 'lines+markers']])
  x, y = prepare_data(x_series, y_series, train_slice, test_slice, hypothesis=hypothesis, create_feature=create_feature, regularization_weight=regularization_weight)
  args = dict(
    x=x,
    y=y,
    mode=mode,
    marker=marker,
    line=line,
    name={
      'train': 'Training examples',
      'new': 'New examples',
      'hypothesis': 'Hypothesis',
      'predict': 'Prediction',
      'train_error': 'Error on training examples',
      'train_squared_error': 'Squared Error on training examples',
      'test': 'True value',
      'test_error': 'Error on test examples',
      'test_squared_error': 'Squared Error on test examples',
    },
    fill={
      'train_squared_error': 'toself',
      'test_squared_error': 'toself',
    },
    fillcolor={
      'train_squared_error': 'rgba(0,0,0,0.4)',
      'test_squared_error': 'rgba(255,0,0,0.4)',
    },
    visible=visible,
  )
  data = [
    go.Scatter(
      **arrange_args(args, mode.keys())[trace]
    )
    for trace in mode.keys()
  ]
  fig = go.Figure(
    data,
    layout=dict(
      width=1600, 
      height=800,
      font=dict(size=32),
      title=title,
      xaxis=xaxis,
      yaxis=yaxis,
      showlegend=True,
    ),
  )
  return fig

x_series = np.array([-2.53, -1.10, -0.88, -0.42, 0.83, 1.21, 2.19, 3.42])
y_series = np.array([-0.91, -1.27, 0.35, 0.22, -0.10, 0.51, 1.82, 1.73])

cost_func = lambda x_series, y_series, slopes: (1/2) * np.mean((np.array(y_series)[:, np.newaxis] - np.array(x_series)[:, np.newaxis] * np.array(slopes)[np.newaxis, :]) ** 2, axis=0)

def xy_loss_figures(x_series, y_series, slopes = np.arange(0.0, 1.5, 0.25), is_example_display=False):
  if is_example_display:
    xy_visible_list_list = [
      [True, False, False, False, False, False, False, False, False], 
    ]
  else:
    xy_visible_list_list = [
      [True, False, True, False, True, True, False, False, False], 
    ]
  if is_example_display:
    slopes = np.array([0.0])
  else:
    slopes = np.array(slopes)
  cost_func_on_data = lambda slopes: cost_func(x_series, y_series, slopes) 
  costs = cost_func_on_data(slopes)
  xtitle = r'${\Huge \text{Feature } x}$'
  ytitle = r'${\Huge \text{Target } y}$'
  ttitle = r'${\Huge \text{Parameter } \theta_{1}}$'
  jtitle = r'${\Huge \text{Cost function } J}$'
  hypothesis = lambda x: slope * x
  train_slice = slice(0, len(x_series))
  test_slice = slice(len(x_series), len(x_series))
  for i_slope, slope in enumerate(slopes):
    if is_example_display:
      title = 'Training Examples'
    else:
      slope_str = '{:+.4f}'.format(slope)
      title = r'${\Huge \text{Hypotheis } \quad y = ' + slope_str + r'x \quad \text{ (} \theta_{1} = ' + slope_str + r' \text{) }}$'
    trace_label_list = ['train', 'new', 'hypothesis', 'predict', 'train_error', 'train_squared_error', 'test', 'test_error', 'test_squared_error']
    n_traces = len(trace_label_list)
    visible_list_list = xy_visible_list_list
    visible_list_list = [[True if visible else 'legendonly' for visible in visible_list] for visible_list in visible_list_list ]
    for visible_list in visible_list_list:
      visible_dict = dict(zip(trace_label_list, visible_list))
      fig = get_fig(x_series, y_series, train_slice, test_slice, hypothesis=hypothesis, title=title, xrange=[-3.0, 3.0], yrange=[-3.0, 3.0], xtitle=xtitle, ytitle=ytitle, visible=visible_dict) 
      fig.show()
  if not is_example_display:
    half_width = 0.0625
    cost_bar_x = [e for slope in slopes for e in [slope-half_width, slope-half_width, slope+half_width, slope+half_width, slope-half_width, None]]
    cost_bar_y = [e for cost in costs for e in [cost, 0, 0, cost, cost, None]]
    scatter_arg_dict_list = [
      dict(
        x=slopes,
        y=costs,
        mode='markers',
        name='hypothesis',
        marker=dict(symbol='line-ew', line_color='blue', line_width=4, size=16.0),
      ),
      dict(
        x=cost_bar_x,
        y=cost_bar_y,
        mode='lines',
        name='Cost function',
        line=dict(color='black', width=1),
        fill='toself',
        fillcolor='rgba(0,0,0,0.4)',
      ),
      dict(
        x=float_space,
        y=cost_func_on_data(float_space),
        mode='lines',
        name='Cost function',
        line=dict(color='black', width=2),
      ),
    ]
    xaxis, yaxis = get_axes(xrange=[-0.5, 1.5], yrange=[-0.125, 1.0], xtitle=ttitle, ytitle=jtitle)
    n_traces = len(scatter_arg_dict_list)
    visible_list_list = [[True, False, False]]
    visible_list_list = [[True if visible else 'legendonly' for visible in visible_list] for visible_list in visible_list_list ]
    for visible_list in visible_list_list:
      data = [go.Scatter(**arg_dict, visible=visible) for arg_dict, visible in zip(scatter_arg_dict_list, visible_list)]
      fig = go.Figure(
        data,
        layout=dict(
          width=1600, 
          height=800,
          font=dict(size=32),
          xaxis=xaxis,
          yaxis=yaxis,
          showlegend=True,
        ),
      )
      fig.show()
    display(Math('{{\\Huge \\text{{The minimum cost is {:+.4f} achieved by }} \\theta_{{1}} = {:+.4f}.}}'.format(np.min(costs), slopes[np.argmin(costs)])))




# Show examples
- Press SHIFT + ENTER on the following cell to see examples.

In [None]:
xy_loss_figures(x_series=x_series, y_series=y_series, is_example_display=True)


# Find the hypotheses by changing the parameter
- Your hypothesis is $$\large y=\theta_1 x$$
- By changing $\theta_{1}$, you can change the slope of the hypothesis.

In [None]:
# Change the slope of hypothesis. After modifying the code, please execute the cell pressing Shift + Enter.

# Please modify the code: start ----------------------------------------

# Please DO NOT TRY TOO MANY PARAMETERS (>=10) AT ONCE
slopes = [-0.2, 0.4, 1.0]

# Please modify the code: end ----------------------------------------

xy_loss_figures(x_series=x_series, y_series=y_series, slopes=slopes)


In [None]:
# --- Edit the following information ---
your_email_address = 'ab0000x@gre.ac.uk'
your_student_id = '000000000'
your_first_name = 'your_first_name'
your_last_name = 'your_last_name'
# --- Edit the above information ---

# After filling the above information, execute this cell by pressing Shift + Enter

# --- Don't touch the below ---
# Execute after filling the above descriptions by pressing Shift + Enter
!pip install pycryptodome
import urllib.request
from bs4 import BeautifulSoup
import requests, warnings
import json
from Crypto.PublicKey import RSA
from Crypto.Cipher import PKCS1_OAEP

submission_id = 'comp-1801_20201002-00'
student_information = dict(email=your_email_address, id=your_student_id, first_name=your_first_name, last_name=your_last_name)
answer = dict(slopes=slopes, lowest_cost=cost_func(x_series, y_series, slopes).min())
submission_dict = dict(submission_id=submission_id, student_information=student_information, answer=answer)
submission_json = json.dumps(submission_dict)

def get_questions(in_url):
    res = urllib.request.urlopen(in_url)
    soup = BeautifulSoup(res.read(), 'html.parser')
    get_names = lambda f: [v for k,v in f.attrs.items() if 'label' in k]
    get_name = lambda f: get_names(f)[0] if len(get_names(f))>0 else 'unknown'
    all_questions = soup.form.findChildren(attrs={'name': lambda x: x and x.startswith('entry.')})
    return {get_name(q): q['name'] for q in all_questions}

def submit_response(form_url, cur_questions, verbose=False, **answers):
    submit_url = form_url.replace('/viewform', '/formResponse')
    form_data = {'draftResponse':[],
                'pageHistory':0}
    for v in cur_questions.values():
        form_data[v] = ''
    for k, v in answers.items():
        if k in cur_questions:
            form_data[cur_questions[k]] = v
        else:
            warnings.warn('Unknown Question: {}'.format(k), RuntimeWarning)
    if verbose:
        print(form_data)
    user_agent = {'Referer':form_url,
                  'User-Agent': "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.52 Safari/537.36"}
    return requests.post(submit_url, data=form_data, headers=user_agent)


FORM_URL = "https://docs.google.com/forms/d/e/1FAIpQLScNs3Cf6DnNCBCUyPGfp22mI3FYVBfTNbGi0TxZ0_SKo9fgCw/viewform"
anno_questions = get_questions(FORM_URL)
public_key_utf = '''-----BEGIN PUBLIC KEY-----
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAp/lFj1VO6DQ+Ot27oamm
KImRJiXz7IlhdlfkAmytONDQAtvgp9/AqfHyIA0+YnaTDGditMK4t1u6s2YvYlW8
5tKbAkbziDAOyaxkepwEw47ldco3hh8p+N42nymWZJp7GKwaHUJ/k1S5sTzFso9o
8/szKGlHUq3lpQdQeWScAirCvCewqJFrJWiLymoS0IbeeCzxCJxqmLwx4kXjCTeU
c9yUqCi+dZ41Cebd8z5y4Ekf58JP+jh/B0VPHV5cR2D/S3zrhWjPnSU4nCKef5pE
b863LlyJ1/sKheanBTq7+9rxMf2rNrsH8Nea4UW2gwtPgOogWFdiWgKYl7B1ks7E
OQIDAQAB
-----END PUBLIC KEY-----'''

split_n = lambda text, n: [ text[i*n:i*n+n] for i in range(len(text)//n) ]
def split_n(string, length):
    return (string[0+i:length+i] for i in range(0, len(string), length))
answer_json_list = split_n(submission_json, 32)
cipher_rsa = PKCS1_OAEP.new(RSA.import_key(public_key_utf.encode('utf-8')))
encrypted_answer_json = '\n'.join([cipher_rsa.encrypt(line.encode()).hex() for line in answer_json_list])
submit_response(FORM_URL, {'answer': 'entry.1985698402'}, **{'answer': encrypted_answer_json})
print('Successfully submitted!!')
print('Check your information: ', ', '.join([': '.join([k, str(v)]) for k, v in student_information.items()]), '\n', 'Your answer: ', ', '.join([': '.join([k, str(v)]) for k, v in answer.items()]))
