In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv("water_waves_training_data.csv", sep=",")
data.head()

In [2]:
def linear_network(height, wave_length, W, eps=1e-16):
    f = W[0]*height + W[1]*wave_length + eps
    return height/f, wave_length/f, f
# 
# # Create a general figure showing the data in 3d
# plt.figure(figsize = [10, 10])
# ax = plt.axes(projection='3d')
# ax.scatter3D(data["height"], data["wave_length"], data["speed"], c=data["speed"], cmap='viridis')


In [32]:
# Create a dataset where height is always bigger then wave length, and if they are not swap their positions. The names have been replaced by height and wave_length as their initial meaning is lost. Keep the speed as speed.

data_swapped = data.copy()
data_swapped["height"] = np.maximum(data["height"], data["wave_length"])
data_swapped["wave_length"] = np.minimum(data["height"], data["wave_length"])
data_swapped["speed"] = data["speed"]
data_swapped_scaled = data_swapped.div

data_heightmorewavelength = data[data["height"] >= data["wave_length"]]
data_heightmorewavelength = data_heightmorewavelength.reset_index(drop=True)
data_wavelengthmoreheight = data[data["height"] <= data["wave_length"]]
data_wavelengthmoreheight = data_wavelengthmoreheight.reset_index(drop=True)

# Rename the columns of the data_heightmorewavelength and data_wavelengthmoreheight to height, wave_length, and speed due to the requirements of a few functions
# data_heightmorewavelength = data_heightmorewavelength.rename(columns={"height": "height", "wave_length": "wave_length", "speed": "speed"})
# data_wavelengthmoreheight = data_wavelengthmoreheight.rename(columns={"height": "height", "wave_length": "wave_length", "speed": "speed"})

data_heightmorewavelength.head()
data_wavelengthmoreheight.head()

In [27]:

plt.figure(figsize = [10, 10])
ax = plt.axes(projection='3d')
ax.scatter3D(data_swapped["height"], data_swapped["wave_length"], data_swapped["speed"], c=data_swapped["speed"], cmap='viridis')


In [4]:
# Fit a line to the data
def linear_network(height, wave_length, W, speed=np.empty , eps=1e-16):
    f = W[0]*height + W[1]*wave_length + eps
    if speed.size == 0:
        return height/f, wave_length/f, f
    else:
        mape = 100 * np.mean(np.abs( f/speed - 1))
        return height/f, wave_length/f, f, mape
    

def fit_and_validate(weights, data, data_scaled, network):
    """
    input:
    weights      array of weights
    data         input data
    data_scaled  rescaled input data
    network      a function e.g. "linear_network" implemented above
    
    output: the MAPE value corresponding to the given input.
    
    This function has been reused from the previously done assignments
    """  
    #################################################################
    # Evaluate network and compute error
    n, m, f , mape = network(data["height"], data["wave_length"], weights, data["speed"])    
    #################################################################
    # Plot
    fig, (awave_length, aspeed) = plt.subplots(1, 2, figsize = (12,4))
    
    #################################################################
    # plot scaled data
    awave_length.scatter(data_scaled.height, data_scaled.wave_length, color='r', label='Data')
    # fitting on the scaled data
    awave_length.scatter(n, m, marker="x", label="Prediction")
    awave_length.legend()   
    #################################################################
    # verification on the actual data
    aspeed.loglog(data.speed, f, marker="x", ls='', label="Prediction")
    # diagonal line    
    z = [data.speed.min(), data.speed.max()]
    aspeed.plot(z, z, color='r', label = "f(x,y)=d")
    # Report Error in the plot title
    aspeed.set_title(f"MAPE = {mape:.2f} %" )
    aspeed.set_xlabel("Reference")
    aspeed.set_ylabel("Prediction")
    aspeed.legend()
    #################################################################
    
    return mape

%matplotlib inline
import ipywidgets as ip

mape_list = []

def interactive_fit(w0, w1):
    
    fig, (awave_length, aspeed) = plt.subplots( nrows=1, ncols=2, figsize=(12,5) )
    
    n, m, f , mape = linear_network(data_swapped["height"], data_swapped["wave_length"], [w0, w1], data_swapped["speed"])
    mape_list.append(mape)
    
    awave_length.loglog(data_swapped.speed, f, marker="x", ls='', label="Prediction")
    
    z = [data_swapped.speed.min(), data_swapped.speed.max()]
    awave_length.plot(z, z, '-r')
    
    awave_length.set_title(f"MAPE = {mape:.2f} %" )
    awave_length.set_xlabel('Reference')
    awave_length.set_ylabel('Prediction')    
    
    
    aspeed.semilogy( np.arange( len(mape_list) ), mape_list, 'o--')
    aspeed.set_xlabel('Epochs')
    aspeed.set_ylabel('MAPE') 
    



ip.interact(interactive_fit, w0=(-0.10, 0.10, 0.001), w1=(-0.10, 0.10, 0.001));
# coef = np.polyfit(data_swapped["height"], data_swapped["wave_length"], 1)
# poly1d_fn = np.poly1d(coef)
# 
# plt.plot(data_swapped["height"], data_swapped["wave_length"], '.', data_swapped["height"], poly1d_fn(data_swapped["height"]), '-')

In [5]:
mape_matrix = np.zeros((200, 200))
for i in range(200):
    for j in range(200):
        mape_matrix[i, j] = linear_network(data_swapped["height"], data_swapped["wave_length"], [i/1000 - 0.1, j/1000 - 0.1], data_swapped["speed"])[3]
        
# Get the coordinates of the minimum value of mape
min_mape = np.where(mape_matrix == np.min(mape_matrix))
print(f"Minimum MAPE value is {np.min(mape_matrix)} at w0 = {min_mape[0][0]/1000 - 0.1} and w1 = {min_mape[1][0]/1000 - 0.1}")

fit_and_validate([min_mape[0][0]/1000 - 0.1, min_mape[1][0]/1000 - 0.1], data_swapped, data_swapped, linear_network)

In [34]:
mape_matrix_heightmorewavelength = np.zeros((200, 200))
for i in range(200):
    for j in range(200):
        mape_matrix_heightmorewavelength[i, j] = linear_network(data_heightmorewavelength["height"], data_heightmorewavelength["wave_length"], [i/1000 - 0.1, j/1000 - 0.1], data_heightmorewavelength["speed"])[3]
        
min_mape_heightmorewavelength = np.where(mape_matrix_heightmorewavelength == np.min(mape_matrix_heightmorewavelength))
print(f"Minimum MAPE value is {np.min(mape_matrix_heightmorewavelength)} at w0 = {min_mape_heightmorewavelength[0][0]/1000 - 0.1} and w1 = {min_mape_heightmorewavelength[1][0]/1000 - 0.1}")
fit_and_validate([min_mape_heightmorewavelength[0][0]/1000 - 0.1, min_mape_heightmorewavelength[1][0]/1000 - 0.1], data_heightmorewavelength, data_heightmorewavelength, linear_network)

In [35]:
mape_matrix_wavelengthmoreheight = np.zeros((200, 200))
for i in range(200):
    for j in range(200):
        mape_matrix_wavelengthmoreheight[i, j] = linear_network(data_wavelengthmoreheight["height"], data_wavelengthmoreheight["wave_length"], [i/1000 - 0.1, j/1000 - 0.1], data_wavelengthmoreheight["speed"])[3]
        
min_mape_wavelengthmoreheight = np.where(mape_matrix_wavelengthmoreheight == np.min(mape_matrix_wavelengthmoreheight))
print(f"Minimum MAPE value is {np.min(mape_matrix_wavelengthmoreheight)} at w0 = {min_mape_wavelengthmoreheight[0][0]/1000 - 0.1} and w1 = {min_mape_wavelengthmoreheight[1][0]/1000 - 0.1}")
fit_and_validate([min_mape_wavelengthmoreheight[0][0]/1000 - 0.1, min_mape_wavelengthmoreheight[1][0]/1000 - 0.1], data_wavelengthmoreheight, data_wavelengthmoreheight, linear_network)


In [20]:
mape_matrix_unswapped = np.zeros((200, 200))
data_renamed = data_swapped.rename(columns={"height": "height", "wave_length": "wave_length", "speed": "speed"})
data_renamed.head()

In [26]:

for i in range(50):
    for j in range(50):
        mape_matrix_unswapped[i, j] = linear_network(data_renamed["height"], data_renamed["wave_length"], [i/100 - 0.2, j/100 - 0.2], data_renamed["speed"])[3]
        
# display the data
print(mape_matrix_unswapped)

In [24]:

# Get the coordinates of the minimum value of mape
min_mape_unswapped = np.where(mape_matrix_unswapped == np.min(mape_matrix_unswapped))
print(f"Minimum MAPE value is {np.min(mape_matrix_unswapped)} at w0 = {min_mape[0][0]/100 - 0.2} and w1 = {min_mape[1][0]/100 - 0.2}")

fit_and_validate([min_mape_unswapped[0][0]/100 - 0.2, min_mape_unswapped[1][0]/100 - 0.2], data_renamed, data_renamed, linear_network)

In [6]:
# Perhaps, the data is better represented in a log-log scale
plt.figure(figsize = [10, 10])
# Create a subplot
plt.subplot(3,2 , 1)
plt.plot(np.log(data["height"]), np.log(data["wave_length"]), '.', ls="")
plt.subplot(3,2 , 2)
plt.plot(np.log(data_swapped["height"]), np.log(data_swapped["wave_length"]), '.', ls="")
plt.subplot(3,2 , 3)
# Plot the figure with a nat-log scale
plt.plot(data["height"], np.log(data["wave_length"]), '.', ls="")
plt.subplot(3,2 , 4)
plt.plot(data_swapped["height"], np.log(data_swapped["wave_length"]), '.', ls="")
# Plot the figure with a log-nat scale
plt.subplot(3,2 , 5)
plt.plot(np.log(data["height"]), data["wave_length"], '.', ls="")
plt.subplot(3,2 , 6)
plt.plot(np.log(data_swapped["height"]), data_swapped["wave_length"], '.', ls="")
