In [1]:
# Importieren der benötigten Bibliotheken

# Datenmanipulation
import pandas as pd

# Machine Learning
from sklearn.model_selection import train_test_split

# Split

**Train-Test-Split**

In [2]:
# Einlesen der bereinigten Daten
df = pd.read_csv("../data/processed/cleaned_data.csv")

df.columns

Index(['Ship_speed_(v)', 'Gas_Turbine_(GT)_shaft_torque_(GTT)_[kN_m]',
       'GT_rate_of_revolutions_(GTn)_[rpm]',
       'Gas_Generator_rate_of_revolutions_(GGn)_[rpm]',
       'Starboard_Propeller_Torque_(Ts)_[kN]',
       'Port_Propeller_Torque_(Tp)_[kN]',
       'Hight_Pressure_(HP)_Turbine_exit_temperature_(T48)_[C]',
       'GT_Compressor_outlet_air_temperature_(T2)_[C]',
       'HP_Turbine_exit_pressure_(P48)_[bar]',
       'GT_Compressor_outlet_air_pressure_(P2)_[bar]',
       'GT_exhaust_gas_pressure_(Pexh)_[bar]',
       'Turbine_Injecton_Control_(TIC)_[%]', 'Fuel_flow_(mf)_[kg/s]',
       'GT_Compressor_decay_state_coefficient',
       'GT_Turbine_decay_state_coefficient'],
      dtype='object')

In [3]:
# Einlesen der bereinigten Daten
df = pd.read_csv("../data/processed/cleaned_data.csv")

# Aufteilung in Traings- und Testdaten
features = df[df.columns[:-2].tolist()]
targets = df[df.columns[-2:].tolist()]

# Durchführung des Train-Test-Splits
features_train, features_test, target_train, target_test = train_test_split(features, 
                                                                            targets, 
                                                                            random_state=42,
                                                                            test_size=0.2,
                                                                            stratify=targets)

# Data Preparation

**Datatype Transformation**


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11934 entries, 0 to 11933
Data columns (total 15 columns):
 #   Column                                                  Non-Null Count  Dtype  
---  ------                                                  --------------  -----  
 0   Ship_speed_(v)                                          11934 non-null  int64  
 1   Gas_Turbine_(GT)_shaft_torque_(GTT)_[kN_m]              11934 non-null  float64
 2   GT_rate_of_revolutions_(GTn)_[rpm]                      11934 non-null  float64
 3   Gas_Generator_rate_of_revolutions_(GGn)_[rpm]           11934 non-null  float64
 4   Starboard_Propeller_Torque_(Ts)_[kN]                    11934 non-null  float64
 5   Port_Propeller_Torque_(Tp)_[kN]                         11934 non-null  float64
 6   Hight_Pressure_(HP)_Turbine_exit_temperature_(T48)_[C]  11934 non-null  float64
 7   GT_Compressor_outlet_air_temperature_(T2)_[C]           11934 non-null  float64
 8   HP_Turbine_exit_pressure_(P48)_[bar]

**Data Imputation**


In [5]:
df.isnull().sum()

Ship_speed_(v)                                            0
Gas_Turbine_(GT)_shaft_torque_(GTT)_[kN_m]                0
GT_rate_of_revolutions_(GTn)_[rpm]                        0
Gas_Generator_rate_of_revolutions_(GGn)_[rpm]             0
Starboard_Propeller_Torque_(Ts)_[kN]                      0
Port_Propeller_Torque_(Tp)_[kN]                           0
Hight_Pressure_(HP)_Turbine_exit_temperature_(T48)_[C]    0
GT_Compressor_outlet_air_temperature_(T2)_[C]             0
HP_Turbine_exit_pressure_(P48)_[bar]                      0
GT_Compressor_outlet_air_pressure_(P2)_[bar]              0
GT_exhaust_gas_pressure_(Pexh)_[bar]                      0
Turbine_Injecton_Control_(TIC)_[%]                        0
Fuel_flow_(mf)_[kg/s]                                     0
GT_Compressor_decay_state_coefficient                     0
GT_Turbine_decay_state_coefficient                        0
dtype: int64

**Deal with Outliers**

In [6]:
# Entferne Zeilen die Außreiter enthalten basierend auf IQR-Methode
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
df = df[~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)]
df.shape

(11742, 15)

In [7]:
# Speichern der Trainings- und Testdaten
features_train.to_csv("../data/processed/features_train.csv", index=False)
features_test.to_csv("../data/processed/features_test.csv", index=False)
target_train.to_csv("../data/processed/target_train.csv", index=False)
target_test.to_csv("../data/processed/target_test.csv", index=False)