# sktime Interview Demo

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import os
import sktime
from sktime.datasets import load_from_tsfile_to_dataframe
import pandas as pd
import numpy as np

# import to retrieve examples
from sktime.datatypes import get_examples

In [3]:
#from numpy.testing import assert_allclose
#assert_allclose(csv_data_pd, csv_data_np)

The using pandas to read csv is 3 times faster than numpy. So we will use pandas in our code.

## The base load data class prep_csv

In [4]:
import os
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

import warnings
warnings.filterwarnings('ignore')

class prep_csv:
    def __init__(
        self,
        path=None,
        file_format=".csv",
        dtype=np.float32,
        decimal=2,
        force_ending="_F_LBF",
        custom_ending="MM_",
        custom_attribute=None
    ):
        self.path=path
        self.file_format=file_format
        self.dtype=dtype
        self.decimal=decimal
        self.force_ending=force_ending
        self.custom_ending=custom_ending
        self.custom_attribute=custom_attribute
        decimal_point='{:,.'+str(self.decimal)+'f}'
        pd.options.display.float_format = decimal_point.format
        super().__init__()

        # Variables for internal uses
        self.segment=None
        self.all_data=None
        self.fmax=None
        self.fmin=None
        self.all_max=None
        self.all_min=None
        self.force_report=None
        self.mm_report=None
        self.custom_attribute_report=None

    def prep_csv(self):
        self._prep_max_min()
        self._prep_force()
        self._prep_mm()
        if isinstance(self.custom_attribute, type(None)) is not True:
            self._get_custom_attribute()
        return self
        
    def _load_data(self):
        """
        Build a multiIndex Dataframe for all cases
        return: A multiIndex DataFrame of all files 
        Index 0: case name
        Index 1: time series
        To "extract" dataframe for a specific case, use the formula
        ResultMax.filter(items=['case name'], axis=0)
        
        Parameter:
        path: absolute or relative path to the folder where csv files are
        file_format: only suport csv at the moment
        dtype: numpy dtype, np.float16, np.float32 or np.float64
        decimal: number of decimal points to display

        Attribute:
        fmax, fmin: Maximum or minimum force in the segment. If minimum, the values will be absolute value.

        """
        file_list = os.listdir(self.path)
        chosen_files = list(filter(lambda f: f.endswith(self.file_format), file_list))
        num_files = len(chosen_files)
        case = []
        time = []
        columns = []
        #content = []
        for i, file in enumerate(tqdm(chosen_files, bar_format='{desc}{percentage:3.0f}%{r_bar}')):
            case_name = file.replace(self.file_format,"")
            current_file = pd.read_csv(path + file)
            if i==0:
                columns = np.array(current_file.columns[1:], dtype="U200")
                columns = np.char.strip(columns)  # Remove white space
                content = np.empty([1,len(current_file.columns)-1])
                current_file = current_file.drop([0])
                new_index = np.arange(len(current_file))  # Reset index to 0
                current_file = current_file.reindex(new_index, method="backfill")
                current_time = current_file.iloc[:,0].astype(self.dtype).to_numpy()  # Get time array
                current_content = current_file.iloc[:,1:].astype(self.dtype).to_numpy()  # Get content array
                # Get case name array to have the same length as time series in order to create MultiIndex
                current_case = np.empty(len(current_time), dtype='S150')
                current_case.fill(case_name)
                # Append all arays
                case = np.append(case, current_case)
                time = np.append(time, current_time)
                content = np.concatenate((content, current_content), axis=0)
                content = np.delete(content, 0, 0)  # Delete first row because it's an empty array
                print(f"Finished {case_name}")
            else:
                case_name = file.replace(self.file_format,"")
                current_file = pd.read_csv(self.path + file)
                current_file = current_file.drop([0])
                new_index = np.arange(len(current_file))  # Reset index to 0
                current_file = current_file.reindex(new_index, method="backfill")
                current_time = current_file.iloc[:,0].astype(self.dtype).to_numpy()  # Get time array
                current_content = current_file.iloc[:,1:].astype(self.dtype).to_numpy()  # Get content array
                # Get case array to have the same length as time series in order to create MultiIndex
                current_case = np.empty(len(current_time), dtype="U200")
                current_case.fill(case_name)
                # Append all arays
                case = np.append(case, current_case)
                time = np.append(time, current_time)
                content = np.concatenate((content, current_content), axis=0)
                print(f"Finished {case_name}")

        # Create MultiIndex array    
        index_array = [
            np.array(case),  
            np.array(time),  
        ]
        tuples = list(zip(*index_array))
        index = pd.MultiIndex.from_tuples(tuples, names=["CASE", "TIME"])
        all_cases = pd.DataFrame(
        content, 
        index=index, 
        columns=columns)        
        return all_cases.astype(self.dtype)      
    
    def _prep_max_min(self):
        self.all_data = self._load_data()
        self.all_max=self.all_data.groupby(level=0).agg(["max"]).droplevel(1, axis=1)  # Max values of each case
        self.all_min=self.all_data.groupby(level=0).agg(["min"]).droplevel(1, axis=1)  # Min values of each case
        return self

    def _prep_force(self):
        all_data_max=self.all_max
        all_data_min=self.all_min
        # Apply masking to extract force
        selected_columns = list(filter(lambda f: f.endswith(self.force_ending) or f.startswith(self.force_ending), all_data_max.columns))
        clean_columns = [col.replace(self.force_ending, "") for col in selected_columns]
        # Process max
        selected_force_max = all_data_max[selected_columns].apply(lambda x: x/1000)
        selected_force_max.columns = clean_columns
        self.segment = clean_columns
        df_force_max = selected_force_max.max().apply(lambda x: 0 if x<0 else x)
        # Apply mask to set negative value to 0
        x = selected_force_max > 0
        self.fmax = selected_force_max.where(x,0)
        df_force_max=df_force_max.to_dict()
        idx_force_max = selected_force_max.idxmax().to_dict()
        # Process min
        selected_force_min = all_data_min[selected_columns].apply(lambda x: x/(-1000))
        selected_force_min.columns = clean_columns
        df_force_min = selected_force_min.min().apply(lambda x: 0 if x<0 else x)
        # Apply mask to set negative value to 0
        x = selected_force_min > 0
        self.fmin = selected_force_min.where(x,0)
        df_force_min=df_force_min.to_dict()
        idx_force_min = selected_force_min.idxmin().to_dict()
        # Create MultiIndex Header
        header=[np.array(["With Flow", "Against Flow", "With Flow Controlling Case", "Against Flow Controlling Case"]), np.array(["kips", "kips", "", ""])]
        self.force_report=pd.DataFrame([df_force_max, df_force_min, idx_force_max, idx_force_min], index=header).T

        # Get worst force column and move it next to 'With Flow' column
        self.force_report[('Worst Force', 'kips')]=self.force_report[[('With Flow', 'kips'), ('With Flow', 'kips')]].max(axis=1)
        move_col = self.force_report.pop(('Worst Force', 'kips'))
        self.force_report.insert(2, ('Worst Force', 'kips'), move_col)
        return self
    
    def _prep_mm(self):
        all_data_max=self.all_max
        # Apply masking to extract attribute
        selected_columns = list(filter(lambda f: f.endswith(self.custom_ending) or f.startswith(self.custom_ending), all_data_max.columns))
        clean_columns = [col.replace(self.custom_ending, "") for col in selected_columns]
        # Process max
        selected_max = all_data_max[selected_columns]
        selected_max.columns = clean_columns
        self.mm_report = selected_max
        return self

    def _get_custom_attribute(self):
        all_data=self.all_data
        # Apply masking to extract attribute
        selected_columns = list(filter(lambda f: f.endswith(self.custom_attribute) or f.startswith(self.custom_attribute), all_data.columns))
        clean_columns = [col.replace(self.custom_ending, "") for col in selected_columns]
        # Process max
        selected_data = all_data[selected_columns]
        selected_data.columns = clean_columns
        self.custom_attribute_report = selected_data
        return self





In [5]:
path = 'H:/Tensorflow Dataset/Example_CCLNG_ESD1/'

In [6]:
PrepMyCSV = prep_csv(path=path, dtype=np.float32, decimal=2, force_ending="_F_LBF", custom_ending=":PMAX.HVAL")


Prep data

In [7]:
PrepMyCSV.prep_csv()

  0%| 0/55 [00:00<?, ?it/s]

Finished dual_20s_all_XV_10k
Finished dual_20s_all_XV_12k
Finished dual_20s_Everywhere_14s_Jetty_10k
Finished dual_20s_Everywhere_14s_Jetty_12k
Finished dual_20s_Everywhere_14s_Tank_10k
Finished dual_20s_Everywhere_14s_Tank_12k
Finished dual_20s_Everywhere_16s_Jetty_10k
Finished dual_20s_Everywhere_16s_Jetty_12k
Finished dual_20s_Everywhere_16s_Tank_10k
Finished dual_20s_Everywhere_16s_Tank_12k
Finished dual_20s_Everywhere_18s_Jetty_10k
Finished dual_20s_Everywhere_18s_Jetty_12k
Finished dual_20s_Everywhere_18s_Tank_10k
Finished dual_20s_Everywhere_18s_Tank_12k
Finished dual_20s_Everywhere_22s_Jetty_10k
Finished dual_20s_Everywhere_22s_Jetty_12k
Finished dual_20s_Everywhere_22s_Tank_10k
Finished dual_20s_Everywhere_22s_Tank_12k
Finished dual_20s_Everywhere_24s_Jetty_10k
Finished dual_20s_Everywhere_24s_Jetty_12k
Finished dual_20s_Everywhere_24s_Tank_10k
Finished dual_20s_Everywhere_24s_Tank_12k
Finished dual_20s_Everywhere_26s_Jetty_10k
Finished dual_20s_Everywhere_26s_Jetty_12k
Finish

<__main__.prep_csv at 0x29873066a08>

In [8]:
PrepMyCSV.all_data

Unnamed: 0_level_0,Unnamed: 1_level_0,XV24541:P-,XV24521:P-,XV24501:P-,XV24641:P-,XV24621:P-,XV24601:P-,XV24581:P-,XV24681:P-,XV24571:P-,XV24671:P-,...,LOADINGARMS:PMAX.HVAL,LOADINGARMS:MASP.HDIF,EXTRA:PMAX.HVAL,EXTRA:MASP.HDIF,INTERTANK:PMAX.HVAL,INTERTANK:MASP.HDIF,SHIPPIPING:PMAX.HVAL,SHIPPIPING:MASP.HDIF,MM_SYSTEM:PMAX.HVAL,MM_SYSTEM:MASP.HDIF
CASE,TIME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
dual_20s_all_XV_10k,0.00,49.98,49.92,49.87,53.34,53.34,53.34,55.37,51.29,55.30,51.25,...,49.28,253.28,53.88,386.19,228.20,211.89,14.86,287.72,233.66,68.89
dual_20s_all_XV_10k,0.00,49.98,49.92,49.87,53.34,53.34,53.34,55.37,51.29,55.30,51.25,...,49.28,253.28,53.88,386.19,228.20,211.89,14.86,287.72,233.66,68.89
dual_20s_all_XV_10k,0.00,49.98,49.92,49.87,53.34,53.34,53.34,55.37,51.29,55.30,51.25,...,49.28,253.28,53.88,386.19,228.20,211.89,14.86,287.72,233.66,68.89
dual_20s_all_XV_10k,0.00,49.98,49.92,49.87,53.34,53.34,53.34,55.37,51.29,55.30,51.25,...,49.28,253.28,53.88,386.19,228.20,211.89,14.86,287.72,233.66,68.89
dual_20s_all_XV_10k,0.00,49.98,49.92,49.87,53.34,53.34,53.34,55.37,51.29,55.30,51.25,...,49.28,253.28,53.88,386.19,228.20,211.89,14.86,287.72,233.66,68.89
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
single_22s_all_XV_12k,7.17,127.25,127.25,127.25,66.13,66.13,66.13,171.80,167.38,113.50,109.06,...,139.21,163.35,207.40,232.67,226.34,213.73,122.58,180.00,230.29,72.27
single_22s_all_XV_12k,7.25,127.25,127.25,127.25,66.13,66.13,66.13,171.79,167.38,113.50,109.07,...,139.21,163.35,207.40,232.67,226.34,213.73,122.58,180.00,230.29,72.27
single_22s_all_XV_12k,7.33,127.25,127.25,127.25,66.12,66.12,66.12,171.79,167.37,113.50,109.07,...,139.21,163.35,207.40,232.67,226.34,213.73,122.58,180.00,230.29,72.27
single_22s_all_XV_12k,7.43,127.25,127.25,127.25,66.12,66.12,66.12,171.78,167.37,113.51,109.07,...,139.21,163.35,207.40,232.67,226.34,213.73,122.58,180.00,230.29,72.27


In [9]:
PrepMyCSV.mm_report.T

CASE,dual_20s_Everywhere_14s_Jetty_10k,dual_20s_Everywhere_14s_Jetty_12k,dual_20s_Everywhere_14s_Tank_10k,dual_20s_Everywhere_14s_Tank_12k,dual_20s_Everywhere_16s_Jetty_10k,dual_20s_Everywhere_16s_Jetty_12k,dual_20s_Everywhere_16s_Tank_10k,dual_20s_Everywhere_16s_Tank_12k,dual_20s_Everywhere_18s_Jetty_10k,dual_20s_Everywhere_18s_Jetty_12k,...,single_20s_LA_22s_LL_10k,single_20s_LA_22s_LL_12k,single_20s_LA_23s_LL_10k,single_20s_LA_23s_LL_12k,single_20s_LA_24s_LL_10k,single_20s_LA_24s_LL_12k,single_20s_all_XV_10k,single_20s_all_XV_12k,single_22s_all_XV_10k,single_22s_all_XV_12k
TANKA_RD,227.36,225.44,227.36,225.44,227.36,225.44,227.36,225.44,227.36,225.44,...,227.4,225.51,227.4,225.51,227.4,225.51,227.4,225.51,227.4,225.51
TANKB_RD,231.39,228.43,231.39,228.43,231.39,228.43,231.39,228.43,231.39,228.43,...,228.86,228.14,228.86,228.14,228.86,228.14,228.86,228.14,228.86,228.14
TANKC_RD,229.41,228.43,229.41,228.43,229.41,228.43,229.41,228.43,229.41,228.43,...,228.86,228.14,228.86,228.14,228.86,228.14,228.86,228.14,228.86,228.14
RUNDOWNLINES,231.57,230.59,231.57,230.59,231.57,230.59,231.57,230.59,231.57,230.59,...,231.02,230.29,231.02,230.29,231.02,230.29,231.02,230.29,231.02,230.29
T1_MAIN,233.57,230.59,233.57,230.59,233.57,230.59,233.57,230.59,233.57,230.59,...,231.02,230.29,231.02,230.29,231.02,230.29,231.02,230.29,231.02,230.29
T2_MAIN,233.66,230.59,233.66,230.59,233.66,230.59,233.66,230.59,233.66,230.59,...,231.02,230.29,231.02,230.29,231.02,230.29,231.02,230.29,231.02,230.29
T3_MAIN,233.61,230.59,233.61,230.59,233.61,230.59,233.61,230.59,233.61,230.59,...,231.02,230.29,231.02,230.29,231.02,230.29,231.02,230.29,231.02,230.29
COOLDOWNLINES,232.21,219.11,232.21,219.11,232.21,219.11,232.21,219.11,232.21,219.11,...,275.21,301.29,305.09,336.06,313.29,353.04,223.57,215.96,223.57,215.96
LOADINGLINES_RD,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21,...,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21,-9.999999778196306e+21
T1_COOLDOWN,232.2,230.59,232.2,230.59,232.2,230.59,232.2,230.59,232.2,230.59,...,231.02,230.29,231.02,230.29,231.02,230.29,231.02,230.29,231.02,230.29


In [10]:
PrepMyCSV.fmax

Unnamed: 0_level_0,CDML_LDJN_6,CDML_LDJN_7,CDML_LDJN_8,CDML_LDJN_9,CDML_LDJN_10,CDML_LDJN_11,CDML_LDJN_12,CDML_LDJN_13,CDML_LDJN_14,CDML_LDJN_15,...,XVL1_JHL6_128_129B,XVW2_JR13_143_144,XVW2_JR13_143_144A,XVW2_JR13_143_144B,JHB6_JHB8_154_155,JHB6_JHB8_154_155A,JHB6_JHB8_154_155B,JHC6_JHC8_163_164,JHC6_JHC8_163_164A,JHC6_JHC8_163_164B
CASE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
dual_20s_Everywhere_14s_Jetty_10k,0.0,0.0,0.0,1.93,0.0,0.0,0.0,0.04,0.04,0.0,...,27.54,1.73,0.0,4.7,0.71,0.0,2.33,4.71,0.0,14.76
dual_20s_Everywhere_14s_Jetty_12k,0.0,0.0,0.0,1.82,0.0,0.0,0.0,0.04,0.04,0.0,...,32.75,1.72,0.0,5.48,0.71,0.0,2.7,4.46,0.0,17.38
dual_20s_Everywhere_14s_Tank_10k,0.0,0.0,0.0,1.93,0.0,0.0,0.0,0.04,0.04,0.0,...,5.21,1.73,0.0,4.7,0.71,0.0,2.33,4.0,0.0,11.4
dual_20s_Everywhere_14s_Tank_12k,0.0,0.0,0.0,1.82,0.0,0.0,0.0,0.04,0.04,0.0,...,6.47,1.72,0.0,5.48,0.71,0.0,2.7,3.61,0.0,11.74
dual_20s_Everywhere_16s_Jetty_10k,0.0,0.0,0.0,1.93,0.0,0.0,0.0,0.04,0.04,0.0,...,26.68,1.73,0.0,4.7,0.71,0.0,2.33,4.25,0.0,14.86
dual_20s_Everywhere_16s_Jetty_12k,0.0,0.0,0.0,1.82,0.0,0.0,0.0,0.04,0.04,0.0,...,31.28,1.72,0.0,5.48,0.71,0.0,2.7,3.82,0.0,16.75
dual_20s_Everywhere_16s_Tank_10k,0.0,0.0,0.0,1.93,0.0,0.0,0.0,0.04,0.04,0.0,...,6.5,1.73,0.0,4.7,0.71,0.0,2.33,3.59,0.0,10.97
dual_20s_Everywhere_16s_Tank_12k,0.0,0.0,0.0,1.82,0.0,0.0,0.0,0.04,0.04,0.0,...,7.46,1.72,0.0,5.48,0.71,0.0,2.7,3.05,0.0,11.27
dual_20s_Everywhere_18s_Jetty_10k,0.0,0.0,0.0,1.93,0.0,0.0,0.0,0.04,0.04,0.0,...,26.12,1.73,0.0,4.7,0.71,0.0,2.33,3.18,0.0,13.52
dual_20s_Everywhere_18s_Jetty_12k,0.0,0.0,0.0,1.82,0.0,0.0,0.0,0.04,0.04,0.0,...,28.73,1.72,0.0,5.48,0.71,0.0,2.7,2.44,0.0,15.07


In [11]:
PrepMyCSV.fmin

Unnamed: 0_level_0,CDML_LDJN_6,CDML_LDJN_7,CDML_LDJN_8,CDML_LDJN_9,CDML_LDJN_10,CDML_LDJN_11,CDML_LDJN_12,CDML_LDJN_13,CDML_LDJN_14,CDML_LDJN_15,...,XVL1_JHL6_128_129B,XVW2_JR13_143_144,XVW2_JR13_143_144A,XVW2_JR13_143_144B,JHB6_JHB8_154_155,JHB6_JHB8_154_155A,JHB6_JHB8_154_155B,JHC6_JHC8_163_164,JHC6_JHC8_163_164A,JHC6_JHC8_163_164B
CASE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
dual_20s_Everywhere_14s_Jetty_10k,0.0,0.09,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.75,0.0,0.0,1.11,0.0,1.49,13.87,0.0
dual_20s_Everywhere_14s_Jetty_12k,0.0,0.09,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,...,1.6,0.0,2.54,0.0,0.0,1.49,0.0,1.15,17.51,0.0
dual_20s_Everywhere_14s_Tank_10k,0.0,0.09,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,...,0.25,0.0,1.75,0.0,0.0,1.11,0.0,0.82,10.29,0.0
dual_20s_Everywhere_14s_Tank_12k,0.0,0.09,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,...,0.33,0.0,2.54,0.0,0.0,1.49,0.0,2.02,12.97,0.0
dual_20s_Everywhere_16s_Jetty_10k,0.0,0.09,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.75,0.0,0.0,1.11,0.0,1.04,13.86,0.0
dual_20s_Everywhere_16s_Jetty_12k,0.0,0.09,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.54,0.0,0.0,1.49,0.0,0.66,16.89,0.0
dual_20s_Everywhere_16s_Tank_10k,0.0,0.09,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,...,0.26,0.0,1.75,0.0,0.0,1.11,0.0,0.67,10.09,0.0
dual_20s_Everywhere_16s_Tank_12k,0.0,0.09,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,...,0.29,0.0,2.54,0.0,0.0,1.49,0.0,1.04,11.88,0.0
dual_20s_Everywhere_18s_Jetty_10k,0.0,0.09,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.75,0.0,0.0,1.12,0.0,0.68,12.43,0.0
dual_20s_Everywhere_18s_Jetty_12k,0.0,0.09,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.54,0.0,0.0,1.49,0.0,0.65,14.52,0.0


In [12]:
PrepMyCSV.force_report

Unnamed: 0_level_0,With Flow,Against Flow,Worst Force,With Flow Controlling Case,Against Flow Controlling Case
Unnamed: 0_level_1,kips,kips,kips,Unnamed: 4_level_1,Unnamed: 5_level_1
CDML_LDJN_6,0.00,0.00,0.00,dual_20s_all_XV_12k,dual_20s_Everywhere_22s_Tank_10k
CDML_LDJN_7,0.00,0.09,0.00,dual_20s_all_XV_12k,dual_20s_Everywhere_14s_Tank_10k
CDML_LDJN_8,0.00,0.00,0.00,dual_20s_all_XV_12k,dual_20s_Everywhere_26s_Jetty_10k
CDML_LDJN_9,1.93,0.00,1.93,dual_20s_Everywhere_14s_Jetty_10k,dual_20s_Everywhere_14s_Tank_10k
CDML_LDJN_10,0.00,0.04,0.00,dual_20s_all_XV_12k,single_20s_Everywhere_16s_Jetty_12k
...,...,...,...,...,...
JHB6_JHB8_154_155A,0.00,1.11,0.00,dual_20s_Everywhere_14s_Jetty_10k,dual_20s_Everywhere_14s_Jetty_10k
JHB6_JHB8_154_155B,2.71,0.00,2.71,dual_20s_Everywhere_22s_Jetty_12k,dual_20s_Everywhere_14s_Jetty_12k
JHC6_JHC8_163_164,4.71,0.63,4.71,dual_20s_Everywhere_14s_Jetty_10k,single_20s_LA_24s_LL_10k
JHC6_JHC8_163_164A,0.04,7.77,0.04,single_20s_LA_18s_LL_10k,single_20s_LA_24s_LL_10k


In [30]:
PrepMyCSV.force_report

Unnamed: 0_level_0,With Flow,Against Flow,Worst Force,With Flow Controlling Case,Against Flow Controlling Case
Unnamed: 0_level_1,kips,kips,kips,Unnamed: 4_level_1,Unnamed: 5_level_1
CDML_LDJN_6,0.00,0.00,0.00,dual_20s_all_XV_12k,dual_20s_Everywhere_22s_Tank_10k
CDML_LDJN_7,0.00,0.09,0.00,dual_20s_all_XV_12k,dual_20s_Everywhere_14s_Tank_10k
CDML_LDJN_8,0.00,0.00,0.00,dual_20s_all_XV_12k,dual_20s_Everywhere_26s_Jetty_10k
CDML_LDJN_9,1.93,0.00,1.93,dual_20s_Everywhere_14s_Jetty_10k,dual_20s_Everywhere_14s_Tank_10k
CDML_LDJN_10,0.00,0.04,0.00,dual_20s_all_XV_12k,single_20s_Everywhere_16s_Jetty_12k
...,...,...,...,...,...
JHB6_JHB8_154_155A,0.00,1.11,0.00,dual_20s_Everywhere_14s_Jetty_10k,dual_20s_Everywhere_14s_Jetty_10k
JHB6_JHB8_154_155B,2.71,0.00,2.71,dual_20s_Everywhere_22s_Jetty_12k,dual_20s_Everywhere_14s_Jetty_12k
JHC6_JHC8_163_164,4.71,0.63,4.71,dual_20s_Everywhere_14s_Jetty_10k,single_20s_LA_24s_LL_10k
JHC6_JHC8_163_164A,0.04,7.77,0.04,single_20s_LA_18s_LL_10k,single_20s_LA_24s_LL_10k


Sort from High to low for With Flow

In [14]:
PrepMyCSV.force_report.sort_values(by=[('With Flow', 'kips')], ascending=0).head(30)

Unnamed: 0_level_0,With Flow,Against Flow,Worst Force,With Flow Controlling Case,Against Flow Controlling Case
Unnamed: 0_level_1,kips,kips,kips,Unnamed: 4_level_1,Unnamed: 5_level_1
JHA6_JHA8_63,91.71,0.0,91.71,single_20s_Everywhere_16s_Jetty_12k,single_20s_Everywhere_16s_Jetty_12k
XVL1_JHL6_121,91.66,0.0,91.66,single_20s_LA_24s_LL_12k,dual_20s_Everywhere_14s_Jetty_12k
XVL1_JHL6_123,89.39,0.0,89.39,single_20s_LA_24s_LL_12k,dual_20s_Everywhere_14s_Jetty_12k
XVR1_JHR6_85,84.31,0.0,84.31,single_20s_Everywhere_16s_Jetty_12k,single_20s_Everywhere_16s_Jetty_12k
XVL1_JHL6_129,84.21,0.0,84.21,single_20s_LA_24s_LL_12k,dual_20s_Everywhere_14s_Jetty_12k
XVR1_JHR6_79,82.93,0.0,82.93,single_20s_Everywhere_16s_Jetty_12k,single_20s_Everywhere_16s_Jetty_12k
XVR1_JHR6_77,81.83,0.0,81.83,single_20s_Everywhere_16s_Jetty_12k,single_20s_Everywhere_16s_Jetty_12k
JHC6_JHC8_164,69.57,0.0,69.57,single_20s_Everywhere_16s_Jetty_12k,single_20s_Everywhere_14s_Tank_12k
XVR1_JHR6_80,68.53,0.96,68.53,single_20s_Everywhere_14s_Jetty_10k,dual_20s_Everywhere_16s_Tank_10k
JHR2_JHR6_75_77,49.68,2.07,49.68,single_20s_Everywhere_14s_Jetty_10k,dual_20s_Everywhere_16s_Tank_10k


Sort from High to low for Against Flow

In [15]:
PrepMyCSV.force_report.sort_values(by=[('Against Flow', 'kips')], ascending=0).head(30)

Unnamed: 0_level_0,With Flow,Against Flow,Worst Force,With Flow Controlling Case,Against Flow Controlling Case
Unnamed: 0_level_1,kips,kips,kips,Unnamed: 4_level_1,Unnamed: 5_level_1
ML12_TKMF_186,0.0,45.18,0.0,single_22s_all_XV_12k,single_20s_Everywhere_14s_Tank_12k
P1A2_J1A2_6,0.0,37.85,0.0,dual_20s_LA_23s_LL_12k,dual_20s_Everywhere_16s_Jetty_10k
P1A1_J1A1_1,0.0,37.85,0.0,dual_20s_Everywhere_14s_Jetty_12k,dual_20s_Everywhere_16s_Jetty_10k
P1C3_J1C3_51,0.0,37.82,0.0,single_20s_Everywhere_14s_Tank_12k,dual_20s_Everywhere_16s_Jetty_10k
P1C1_J1C1_41,0.0,37.82,0.0,single_20s_Everywhere_14s_Tank_12k,dual_20s_Everywhere_16s_Jetty_10k
P1C2_J1C2_46,0.0,37.82,0.0,single_20s_Everywhere_14s_Tank_12k,dual_20s_Everywhere_16s_Jetty_10k
P1A4_J1A4_16,0.0,37.78,0.0,single_20s_LA_24s_LL_12k,dual_20s_Everywhere_14s_Jetty_10k
P1C4_J1C4_56,0.0,37.78,0.0,single_20s_LA_24s_LL_12k,single_20s_Everywhere_14s_Jetty_10k
P1A3_J1A3_11,0.0,37.78,0.0,dual_20s_Everywhere_26s_Tank_12k,dual_20s_Everywhere_14s_Tank_10k
P1B2_J1B2_26,0.0,37.78,0.0,single_20s_Everywhere_16s_Tank_10k,single_20s_Everywhere_14s_Jetty_10k


## Plotting Results

Get segment length

In [16]:
segment_len_df = pd.read_csv('E:/sktime/sktime/benchmarking/CCLNG_Segment_Length.csv', header=[0,1], index_col=0)
segment_len_df=segment_len_df.dropna()
segment_len_df.pop(("Sample", "Value"))
segment_len_df.head()

Segment,Length
Name,ft
J1A1_JHA1_2,40.53
CNA1_JHA1_3,8.42
CNA1_JHA1_4,36.3
CNA1_JHA1_5,4.17
J1A2_JHA2_7,41.02


Get segment force result

In [17]:
segment_force_df = PrepMyCSV.force_report[[('Worst Force', 'kips')]]
segment_force_df.head()


Unnamed: 0_level_0,Worst Force
Unnamed: 0_level_1,kips
CDML_LDJN_6,0.0
CDML_LDJN_7,0.0
CDML_LDJN_8,0.0
CDML_LDJN_9,1.93
CDML_LDJN_10,0.0


Merge length and force into 1 df

In [18]:
MyResult = pd.concat([segment_len_df, segment_force_df], axis=1, join='inner')
MyResult.columns=MyResult.columns.droplevel(-1)
MyResult.rename(columns={"Length":"Length (ft)", "Worst Force": "Worst Force (kips)"}, inplace=True)
# Remove cases where worst forces is 0
new_col = np.empty(len(MyResult["Length (ft)"]), dtype="U100")
new_col.fill("evoleap")
MyResult["Type"] = new_col
MyResult.sort_values(by=[('Length (ft)')], ascending=0).head()

Segment,Length (ft),Worst Force (kips),Type
XVE1_JHE8_175,791.82,26.42,evoleap
XVE2_JHE2_187,772.66,40.88,evoleap
XVL1_JHL6_124,707.0,44.24,evoleap
XVR1_JHR6_80,704.0,68.53,evoleap
JHL2_JHL6_119_121,597.23,39.32,evoleap


Get bechtel result

In [19]:
other_result = pd.read_csv('E:/sktime/sktime/benchmarking/Software_1_Result.csv', index_col='Segment')
other_result=other_result.dropna()
new_col = np.empty(len(other_result["Length (ft)"]), dtype="U100")
new_col.fill("Bechtel")
other_result["Type"] = new_col
other_result

Unnamed: 0_level_0,Length (ft),Worst Force (kips),Type
Segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
J1A1 _JHA1 _2,43.46,46.10,Bechtel
CNA1 _JHA1 _3,8.41,46.10,Bechtel
CNA1 _JHA1 _4,36.31,46.60,Bechtel
CNA1 _JHA1 _5,4.16,45.70,Bechtel
J1A2 _JHA2 _7,41.35,46.10,Bechtel
...,...,...,...
XVE2 _JHE2 _190,55.50,48.60,Bechtel
XVE2 _JHE2 _191,549.33,52.70,Bechtel
XVE2 _JHE2 _192,229.98,56.20,Bechtel
XVE2 _JEDD _193,96.32,77.70,Bechtel


Combine evoleap and bechtel results into 1 df

In [33]:
# Combine Myresult and Other result and reset index
combined_df=pd.concat([other_result, MyResult], join="outer", axis=0)
combined_df

Unnamed: 0,Length (ft),Worst Force (kips),Type
J1A1 _JHA1 _2,43.46,46.10,Bechtel
CNA1 _JHA1 _3,8.41,46.10,Bechtel
CNA1 _JHA1 _4,36.31,46.60,Bechtel
CNA1 _JHA1 _5,4.16,45.70,Bechtel
J1A2 _JHA2 _7,41.35,46.10,Bechtel
...,...,...,...
RE5C_RECC_779,7.73,0.00,evoleap
RE5C_RECC_780,10.07,0.01,evoleap
RE5B_RECB_781,8.00,0.04,evoleap
RE5B_RECB_782,4.50,0.01,evoleap


Plot in plotly

In [34]:
import plotly.express as px

df = combined_df
fig = px.scatter(df, x="Length (ft)", y="Worst Force (kips)", color="Type", trendline="ols")
fig.show()

results = px.get_trendline_results(fig)
print(results)

print(results.px_fit_results.iloc[0].summary())
print(results.px_fit_results.iloc[1].summary())


      Type                                     px_fit_results
0  Bechtel  <statsmodels.regression.linear_model.Regressio...
1  evoleap  <statsmodels.regression.linear_model.Regressio...
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.033
Model:                            OLS   Adj. R-squared:                  0.026
Method:                 Least Squares   F-statistic:                     5.177
Date:                Wed, 04 May 2022   Prob (F-statistic):             0.0243
Time:                        12:46:52   Log-Likelihood:                -616.18
No. Observations:                 156   AIC:                             1236.
Df Residuals:                     154   BIC:                             1242.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef   

Alternatively, we can plot these 2 plots separately

In [27]:
import plotly.express as px

df = combined_df
fig = px.scatter(df, x="Length (ft)", y="Worst Force (kips)", facet_col="Type", color="Type", trendline="ols")
fig.show()

results = px.get_trendline_results(fig)
print(results)

print(results.px_fit_results.iloc[0].summary())
print(results.px_fit_results.iloc[1].summary())


      Type                                     px_fit_results
0  Bechtel  <statsmodels.regression.linear_model.Regressio...
1  evoleap  <statsmodels.regression.linear_model.Regressio...
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.033
Model:                            OLS   Adj. R-squared:                  0.026
Method:                 Least Squares   F-statistic:                     5.177
Date:                Wed, 04 May 2022   Prob (F-statistic):             0.0243
Time:                        12:44:42   Log-Likelihood:                -616.18
No. Observations:                 156   AIC:                             1236.
Df Residuals:                     154   BIC:                             1242.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef   