# PACKAGES

In [1]:
#!pip install -r requirements.txt

import sqlite3
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# import torch
# import torch.nn as nn
# import pyro
# from pyro.nn import PyroModule, PyroSample
# import pyro.distributions as dist

# from pyro.infer import MCMC, NUTS
# from pyro.infer import Predictive
# from pyro.infer import SVI, Trace_ELBO
# from pyro.infer.autoguide import AutoDiagonalNormal
# from tqdm.auto import trange

# # Set random seed for reproducibility
# np.random.seed(42)
# # Set Pyro random seed
# pyro.set_rng_seed(42)

# READ-IN DATA

In [2]:
def db_to_features(db, sensor_list, feature_list):
    '''
    ######TO IMPLEMENT THE DB: delete attributes (df_sensor, df_stats, df_simAttr) and uncomment the connection to the database#####
    This function reads the database and returns the tables of interest.
    It modifies the tables so that they are ready to be used in the analysis.

    Inputs:
    db: path to the database
    sensor_list: list of sensors to be used
    feature_list: list of features to be used

    Outputs:
    sim_results: table of simulation results
    caselist: table of simulation attributes
    cl_filtered: empty table to be populated with run simulations
    sr_filtered: empty table to be populated with run simulations
    sensors: table of sensors
    '''

    #connect to database and read tables
    con = sqlite3.connect(db)
    df_sensors = pd.read_sql_query('SELECT * FROM sensors', con)
    df_stats = pd.read_sql_query('SELECT * FROM standardstatistics', con)
    df_simAttr = pd.read_sql_query('SELECT * FROM simulationattributes', con)
    con.close()
 
   # groups of simulations (different seeds)
    groups = df_simAttr.loc[df_simAttr['name'] == 'GroupID'].set_index('simulation_id') #get groupID for each simulation
    groups = df_simAttr.loc[df_simAttr['name'] == 'GroupID'] #get groupID for each simulation
    groups = groups.rename(columns={'value':'GroupID'})[['simulation_id', 'GroupID']].astype(int) #rename and convert to int

    # filter simulation results
    sensors = df_sensors[df_sensors['name'].isin(sensor_list)] #filter 2348 sensors for 12
    sim_results = df_stats[df_stats['sensor_id'].isin(sensors['id'].unique())] #filter results of 2348 sensors for 12
    sim_results = sim_results.pivot(index='simulation_id', columns='sensor_id', values='max').sort_index()
    sim_results = sim_results.merge(groups, on='simulation_id').set_index('simulation_id') #join groups with sim_results on simulation_id

    # filter simulation attributes
    df_simAttr_filter = df_simAttr[df_simAttr['name'].isin(feature_list)] #filter 47 simulation attributes for 8
    caselist = df_simAttr_filter.pivot(index='simulation_id', columns='name', values='value') #pivot to get 1 row per simulation
    caselist = caselist.merge(groups, on='simulation_id').set_index('simulation_id') #join groups and caselist on simulation_id
    caselist = caselist.drop(5333) #drop simulation 5333 as it has no results

    #create a new dataframe with the same columnnames as the caselist
    cl_filtered = caselist.iloc[0:0].copy()
    sr_filtered = sim_results.iloc[0:0].copy()
    
    return caselist, sim_results, cl_filtered, sr_filtered, sensors

simstats = 'U62_PULSE_simulationstats.db'

sensor_list = ['foundation_origin xy FloaterOffset [m]',
               'foundation_origin Rxy FloaterTilt [deg]',
               'foundation_origin Rz FloaterYaw [deg]',
               'foundation_origin z FloaterHeave [m]',
               'foundation_origin Mooring GXY Resultant force [kN]',
               'MooringLine1 Effective tension Fairlead [kN]',
               'MooringLine2 Effective tension Fairlead [kN]',
               'MooringLine3 Effective tension Fairlead [kN]',
               'MooringLine4 Effective tension Fairlead [kN]',
               'MooringLine5 Effective tension Fairlead [kN]',
               'GE14-220 GXY acceleration [m/s^2]',
               'CEN_E3 Resultant bending moment ArcLength=2.72 [kN.m]',
]
feature_list = ['WindGeographic',
                'Uhub',
                'WaveGeographic',
                'Hs',
                'YawError',
                'Tp',
                'CurrentGeographic',
                'CurrentSpeed',
]

caselist, sim_results, cl_filtered, sr_filtered, sensors = db_to_features(simstats, sensor_list, feature_list)

In [16]:
sim_results.value_counts('GroupID')[sim_results.value_counts('GroupID') != 3]


GroupID
1090    2
1063    2
Name: count, dtype: int64

In [18]:
caselist.to_csv('caselist.csv')

In [4]:
sensors

Unnamed: 0,id,name
48,49,foundation_origin z FloaterHeave [m]
51,52,foundation_origin Rz FloaterYaw [deg]
58,59,foundation_origin xy FloaterOffset [m]
59,60,foundation_origin Rxy FloaterTilt [deg]
163,164,CEN_E3 Resultant bending moment ArcLength=2.72...
1476,1477,MooringLine1 Effective tension Fairlead [kN]
1492,1493,MooringLine2 Effective tension Fairlead [kN]
1508,1509,MooringLine3 Effective tension Fairlead [kN]
1524,1525,MooringLine4 Effective tension Fairlead [kN]
1540,1541,MooringLine5 Effective tension Fairlead [kN]


In [3]:
sim_results

Unnamed: 0_level_0,49,52,59,60,164,1477,1493,1509,1525,1541,1563,2348,GroupID
simulation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,1.691887,-2.120961,18.965508,4.103344,572147.297447,642.009155,628.924316,3369.619629,8743.897461,693.981445,6854.937477,1.889086,312
2,1.250475,-1.011889,17.376253,3.133629,437043.607936,723.585144,698.690552,3120.203369,8305.519531,792.834595,6434.088664,1.905250,315
3,1.363898,-0.269001,18.012307,3.873457,553222.715490,634.899475,627.509644,3228.936035,8404.508789,687.081482,6432.264117,2.129340,311
4,1.021289,-0.522990,17.559575,2.982816,522729.357912,749.992554,728.521240,3025.437744,8423.188477,810.803101,6550.237208,2.583491,313
5,2.149652,5.092750,22.914839,3.082897,411877.701497,1386.479248,1302.627930,2150.176758,9823.062500,1885.004883,8190.089347,2.711316,317
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5665,0.683119,0.764273,19.953560,3.989195,530876.222038,645.002991,635.562012,4980.209473,9143.626953,726.015259,7329.141337,2.064749,409
5666,5.127405,8.587289,29.656173,6.097249,567520.837672,3867.829346,4879.607422,5006.321289,1941.879517,2957.632568,10010.185484,2.623968,1205
5667,1.423191,2.257077,23.684501,3.510680,441296.787171,1854.119995,1613.260010,1754.702881,8440.926758,2246.303223,6971.787400,2.656162,120
5668,1.226076,2.450841,25.256802,3.337757,477359.503286,1640.242065,1395.244995,1781.616089,10422.503906,1913.273438,8801.555657,3.486386,121


In [6]:
column_mapping = pd.Series(sensors.name.values,index=sensors.id).to_dict()
column_mapping

{49: 'foundation_origin z FloaterHeave [m]',
 52: 'foundation_origin Rz FloaterYaw [deg]',
 59: 'foundation_origin xy FloaterOffset [m]',
 60: 'foundation_origin Rxy FloaterTilt [deg]',
 164: 'CEN_E3 Resultant bending moment ArcLength=2.72 [kN.m]',
 1477: 'MooringLine1 Effective tension Fairlead [kN]',
 1493: 'MooringLine2 Effective tension Fairlead [kN]',
 1509: 'MooringLine3 Effective tension Fairlead [kN]',
 1525: 'MooringLine4 Effective tension Fairlead [kN]',
 1541: 'MooringLine5 Effective tension Fairlead [kN]',
 1563: 'foundation_origin Mooring GXY Resultant force [kN]',
 2348: 'GE14-220 GXY acceleration [m/s^2]'}

In [7]:
sim_results_renamed = sim_results.rename(columns=column_mapping)
sim_results_renamed

Unnamed: 0_level_0,foundation_origin z FloaterHeave [m],foundation_origin Rz FloaterYaw [deg],foundation_origin xy FloaterOffset [m],foundation_origin Rxy FloaterTilt [deg],CEN_E3 Resultant bending moment ArcLength=2.72 [kN.m],MooringLine1 Effective tension Fairlead [kN],MooringLine2 Effective tension Fairlead [kN],MooringLine3 Effective tension Fairlead [kN],MooringLine4 Effective tension Fairlead [kN],MooringLine5 Effective tension Fairlead [kN],foundation_origin Mooring GXY Resultant force [kN],GE14-220 GXY acceleration [m/s^2],GroupID
simulation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,1.691887,-2.120961,18.965508,4.103344,572147.297447,642.009155,628.924316,3369.619629,8743.897461,693.981445,6854.937477,1.889086,312
2,1.250475,-1.011889,17.376253,3.133629,437043.607936,723.585144,698.690552,3120.203369,8305.519531,792.834595,6434.088664,1.905250,315
3,1.363898,-0.269001,18.012307,3.873457,553222.715490,634.899475,627.509644,3228.936035,8404.508789,687.081482,6432.264117,2.129340,311
4,1.021289,-0.522990,17.559575,2.982816,522729.357912,749.992554,728.521240,3025.437744,8423.188477,810.803101,6550.237208,2.583491,313
5,2.149652,5.092750,22.914839,3.082897,411877.701497,1386.479248,1302.627930,2150.176758,9823.062500,1885.004883,8190.089347,2.711316,317
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5665,0.683119,0.764273,19.953560,3.989195,530876.222038,645.002991,635.562012,4980.209473,9143.626953,726.015259,7329.141337,2.064749,409
5666,5.127405,8.587289,29.656173,6.097249,567520.837672,3867.829346,4879.607422,5006.321289,1941.879517,2957.632568,10010.185484,2.623968,1205
5667,1.423191,2.257077,23.684501,3.510680,441296.787171,1854.119995,1613.260010,1754.702881,8440.926758,2246.303223,6971.787400,2.656162,120
5668,1.226076,2.450841,25.256802,3.337757,477359.503286,1640.242065,1395.244995,1781.616089,10422.503906,1913.273438,8801.555657,3.486386,121


In [9]:
sim_results_renamed.to_csv('sim_results.csv')

# PREPARE DATA

In [None]:
def prepare_data(input_df, output_df, output_col=59, test_size=0.2, random_state=42):
    X = input_df.copy()
    Y = output_df[output_col].copy()
    
    # Normalize the data
    scaler = MinMaxScaler()
    X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
    Y_scaled = pd.DataFrame(scaler.fit_transform(Y.values.reshape(-1, 1)), columns=[Y.name])
    
    # Apply PCA to the normalized input data
    pca = PCA(n_components=1)
    X_pca = pca.fit_transform(X_scaled)
    X_pca = pd.DataFrame(X_pca, columns=['PC1'])
    #print how much variance is explained by the first principal component
    print('Variance explained by the first principal component:', pca.explained_variance_ratio_[0])
    
    # Split the data into train and test sets
    X_train, X_test, Y_train, Y_test = train_test_split(X_pca, Y_scaled, test_size=test_size, random_state=random_state)
    
    return X_train, X_test, Y_train, Y_test

x_train, x_test, y_train, y_test = prepare_data(caselist, sim_results)

In [None]:
x_train_torch = torch.tensor(x_train.values, dtype=torch.float32).squeeze()
y_train_torch = torch.tensor(y_train.values, dtype=torch.float32).squeeze()
x_test_torch = torch.tensor(x_test.values, dtype=torch.float32).squeeze()
y_test_torch = torch.tensor(y_test.values, dtype=torch.float32).squeeze()

print(x_train_torch.shape, y_train_torch.shape, x_test_torch.shape)

# RUN MODEL

# EVALUATE OUTCOME

In [None]:
def rmse(y_true, y_pred):
    return np.sqrt(np.mean((np.array(y_true) - np.array(y_pred))**2))
rmse(preds['obs'], y_test_torch)