In [30]:
# Import packages
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import mean_squared_error as mse
from sklearn import linear_model
from sklearn.neural_network import MLPRegressor

In [2]:
# Read in buoy data and relevant variables
buoy_file = "C:/Users/rache/Documents/University of Southern California/2023-2024 Year 4/GEOL 599 Environmental data science/data/HW8_buoy_data.xlsx" #note on path
buoy_all = pd.read_excel(buoy_file)
buoy = buoy_all.loc[:,['WVHT','DPD','APD','MWD','WTMP']]
buoy = buoy.rename(columns={'WVHT':'height','DPD':'dom pd','APD':'avg pd','MWD':'dir','WTMP':'temp'})
buoy[(buoy['height']==99)|(buoy['dom pd']==99)|(buoy['avg pd']==99)|(buoy['dir']==99)|(buoy['temp']==99)] #See rows with no data
buoy= buoy.drop(buoy[buoy['height']==99].index)

In [3]:
# Convert to numpy
X = buoy.loc[:,['dom pd','avg pd','dir','temp']].to_numpy()
X
y = buoy.loc[:,'height'].to_numpy()
y

array([1.12, 1.16, 1.2 , ..., 1.79, 1.66, 1.7 ])

In [6]:
# Split data 75:10:15 into training:validation:test
Xtr, X1 , ytr, y1 = tts(X, y, test_size= 0.25) 
Xva, Xte, yva, yte = tts(X1, y1, test_size= 0.6)

In [7]:
# Train a linear model to buoy data
lin = linear_model.LinearRegression()
lin.fit(Xtr, ytr)
print(lin.score(Xtr,ytr))
print(lin.score(Xva,yva))
print(lin.score(Xte,yte))

0.29082948871760794
0.28775033457809107
0.31603409902413726


In [8]:
# Train a neural regression to buoy data
MLP = MLPRegressor(hidden_layer_sizes= (16,16,16))
MLP.fit(Xtr, ytr)
print(MLP.score(Xtr,ytr))
print(MLP.score(Xva,yva))
print(MLP.score(Xte,yte))

0.32152396917858794
0.3155758243383662
0.34834472330291866


In [None]:
### Answers to problem set.
# 1.) Linear model vs. neural network: Based on their scores, neural network does slightly better than the linear model, but it is only marginally better.
# 2.) The cost function of the linear model is ordinary least squares. The one for the neutral network is a stochastic gradient-based optimizer.

In [32]:
### 3.) Fill the table.
layer_ls = [[1,],[16,],[1,16],[16,16],[16,4,16],[16,16,16],[16,16,16,16],[16,32,64,128],[1,1,1,1,1],[100,100,100,100,100]]
tr_error = []
te_error = []
for i in layer_ls:
    MLP = MLPRegressor(hidden_layer_sizes= i)
    MLP.fit(Xtr,ytr)
    tr_error.append(mse(ytr, MLP.predict(Xtr)).round(3))
    te_error.append(mse(yte, MLP.predict(Xte)).round(3))

In [33]:
# Look at your list of train and test errors
print(tr_error)
print(te_error)

[0.278, 0.205, 0.179, 0.199, 0.199, 0.214, 0.193, 0.261, 0.278, 0.175]
[0.285, 0.204, 0.173, 0.198, 0.197, 0.214, 0.184, 0.249, 0.285, 0.166]


In [34]:
# Make the two lists into a dataframe
layer_num = [1,1,2,2,3,3,4,4,5,5]
error_df = pd.DataFrame(data= {'Number of layers':layer_num, 'Nodes in layers': layer_ls, 'Train error': tr_error, 'Test error': te_error })
error_df

Unnamed: 0,Number of layers,Nodes in layers,Train error,Test error
0,1,[1],0.278,0.285
1,1,[16],0.205,0.204
2,2,"[1, 16]",0.179,0.173
3,2,"[16, 16]",0.199,0.198
4,3,"[16, 4, 16]",0.199,0.197
5,3,"[16, 16, 16]",0.214,0.214
6,4,"[16, 16, 16, 16]",0.193,0.184
7,4,"[16, 32, 64, 128]",0.261,0.249
8,5,"[1, 1, 1, 1, 1]",0.278,0.285
9,5,"[100, 100, 100, 100, 100]",0.175,0.166


In [None]:
### 4.) Generally, more layers reduces the error and having more nodes reduces the error. However, errors are reduced when each layers node is about the same and when layers have more than one node.

In [35]:
activ = ['identity', 'logistic', 'tanh', 'relu']
act_error = []
for i in activ:
    MLP2 = MLPRegressor(hidden_layer_sizes= [16,16,16,16,16], activation= i)
    MLP2.fit(Xtr,ytr)
    act_error.append(mse(yte, MLP2.predict(Xte)).round(3))

In [36]:
print(act_error)
### 5.) It seems changing the activation function doesn't affect the error except with "identity", which had the highest error.

[0.245, 0.162, 0.167, 0.181]
