# Data Preprocessing of output flow of gas turbines

### Import of libraries

In [1]:
import os
import matplotlib.pyplot as plt
import math
import numpy as np
import pandas as pd
import Preprocessing as pp

Need to specify the global variables such as where the data lies and what sampling rate the processed data should have

In [2]:
TOTAL_TIME = 12231 #total time for experiment in seconds
SAMPLING_RATE = 1 #number of samples per seconds wished (so 0.5 for a sample every 2 seconds)
sample_size = math.ceil(TOTAL_TIME*SAMPLING_RATE) +1 # +1 for starting and end point
time_downsampled = np.arange(sample_size) 

OPEN_FOLDER = "../Data/Raw_Data/" # where are the raw matlab files?
SAVE_FOLDER = "../Data/Temp_Data/Messdaten_Test_ID_4B/" # where do you want to save the .csv files

open_4b = OPEN_FOLDER + "Messdaten_Test_ID_4b/"

if not os.path.exists(SAVE_FOLDER):
    os.mkdir(SAVE_FOLDER)
    print('Creation of dircetory %s successful.' % SAVE_FOLDER)

In [3]:
raw_data_4B = pp.openMatfiles(OPEN_FOLDER)

## General preperations
Such as splitting the data, provide universal nomenclature and other methods useful for all data set files

We use the Largest-Triangle-Three-Bucket-Downsampling method, which [1] discovered to be best overall.

[1]: https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf

In [4]:
data_4B = pp.prepareSheets(raw_data_4B, sample_size)

AttributeError: module 'Preprocessing' has no attribute 'prepareSheets'

In [None]:
data_4B_1B_el = data_4B['Daten_Test_ID_4b_1B_el']
data_4B_1B_th = data_4B['Daten_Test_ID_4b_1B_th']
data_4B_2A_el_1 = data_4B['Daten_Test_ID_4b_2A_el_1']
data_4B_2A_el_2 = data_4B['Daten_Test_ID_4b_2A_el_2']
data_4B_2A_th = data_4B['Daten_Test_ID_4b_2A_th']
data_4B_drehzahl = data_4B['Drehzahldaten_Test_ID_4b']
data_4B_power = data_4B['Leistungdaten_Test_ID_4b']

In [None]:
#those figures are for visual comparison of the downsampling method to see if it's relevant
fig,axs = plt.subplots(2, 2, figsize = (20,16))
axs = axs.ravel()
axs[2].plot(time_downsampled[:200], data_4B_1B_el['I_N15'][:200])
axs[0].plot(raw_data_4B['Daten_Test_ID_4b_1B_el']['t_1B_el'][:1000000], raw_data_4B['Daten_Test_ID_4b_1B_el']['I_N15'][:1000000])
axs[1].plot(raw_data_4B['Daten_Test_ID_4b_1B_el']['t_1B_el'][:500000], raw_data_4B['Daten_Test_ID_4b_1B_el']['U_N'][:500000])
axs[3].plot(time_downsampled[:100], data_4B_1B_el['U_N'][:100])
plt.show()

## Seperate visualisation/adaptation for all data files and saving them in extra file in /Temp_Data/*

**Testdaten 1B_el (electrical output)**

The pure physical outputs as of voltage and amperage which show a periodical and symetrical stage

In [None]:
fig,axs = plt.subplots(2, 1, figsize = (20,16))
axs = axs.ravel()
axs[0].plot(time_downsampled[:200], data_4B_1B_el['I_N15'][:200])
axs[1].plot(time_downsampled[:100], data_4B_1B_el['U_N'][:100])
plt.show()

el_measures_1B = pd.DataFrame(list(zip(data_4B_1B_el['t_1B_el'],
                             data_4B_1B_el['I_N15'],
                               data_4B_1B_el['U_N'])))
el_measures_1B.columns = ['el_time_measures','el_stromstaerke', 'el_spannung']
el_measures_1B.to_csv(SAVE_FOLDER + "el_measures_1B.csv",
                     index = False, sep = "|", encoding = "utf-8")

**Testdaten 1B_th (thermical output)**

The thermical output depending on multiple data series. Not quite sure what they mean for now, but BP_34, BT_34 and BT_6 seem like they have some correlation with the total thermical output.

In [None]:
fig,axs = plt.subplots(3, 3, figsize = (20,16))
axs = axs.ravel()
axs[0].plot(data_4B_1B_th['t_1B_th'], data_4B_1B_th['BP_2'])
axs[1].plot(data_4B_1B_th['t_1B_th'], data_4B_1B_th['BP_34'])
axs[2].plot(data_4B_1B_th['t_1B_th'], data_4B_1B_th['BP_5'])
axs[3].plot(data_4B_1B_th['t_1B_th'], data_4B_1B_th['BP_6'])
axs[4].plot(data_4B_1B_th['t_1B_th'], data_4B_1B_th['BP_7'])
axs[5].plot(data_4B_1B_th['t_1B_th'], data_4B_1B_th['BT_2'])
axs[6].plot(data_4B_1B_th['t_1B_th'], data_4B_1B_th['BT_34'])
axs[7].plot(data_4B_1B_th['t_1B_th'], data_4B_1B_th['BT_6'])
axs[8].plot(data_4B_1B_th['t_1B_th'], data_4B_1B_th['BT_7'])
plt.show()

th_measures_1B = pd.DataFrame(list(zip(data_4B_1B_th['t_1B_th'],
                             data_4B_1B_th['BP_2'],
                             data_4B_1B_th['BP_34'],
                             data_4B_1B_th['BP_5'],
                             data_4B_1B_th['BP_6'],
                             data_4B_1B_th['BP_7'],
                             data_4B_1B_th['BT_2'],
                             data_4B_1B_th['BT_34'],
                             data_4B_1B_th['BT_6'],
                             data_4B_1B_th['BT_7'])))
th_measures_1B.columns = ['th_time_measures','th_BP2', 'th_BP34', 'th_BP5', 'th_BP6', 'th_BP7', 'th_BT2', 'th_BT34', 'th_BT6', 'th_BT7']

th_measures_1B.to_csv(SAVE_FOLDER + "th_measures_1B.csv",
                     index = False, sep = "|", encoding = "utf-8")

**Testdaten 2A_el_1 (electrical output)**

The pure physical outputs as of voltage and amperage which show a periodical and symetrical stage

In [None]:
fig,axs = plt.subplots(2, 2, figsize = (20,16))
axs = axs.ravel()
axs[0].plot(data_4B_2A_el_1['t_2A_el_1'], data_4B_2A_el_1['I_G15'])
axs[1].plot(data_4B_2A_el_1['t_2A_el_1'], data_4B_2A_el_1['U_G'])
axs[2].plot(data_4B_2A_el_2['t_2A_el_2'], data_4B_2A_el_2['sw_MGT'])
axs[3].plot(data_4B_2A_el_2['t_2A_el_2'], data_4B_2A_el_2['U_ZK'])
plt.show()

el_measures_2A_1 = pd.DataFrame(list(zip(data_4B_2A_el_1['t_2A_el_1'],
                             data_4B_2A_el_1['I_G15'],
                             data_4B_2A_el_1['U_G'])))
el_measures_2A_1.columns = ['el_time_measures', 'el_stromstaerke', 'el_spannung']
el_measures_2A_1.to_csv(SAVE_FOLDER + "el_measures_2A_1.csv",
                     index = False, sep = "|", encoding = "utf-8")


el_measures_2A_2 = pd.DataFrame(list(zip(data_4B_2A_el_2['t_2A_el_2'],
                             data_4B_2A_el_2['sw_MGT'],
                             data_4B_2A_el_2['U_ZK'])))
el_measures_2A_2.columns = ['el_time_measures', 'el_spannung_input', 'el_spannung']
el_measures_2A_2.to_csv(SAVE_FOLDER + "el_measures_2A_2.csv",
                     index = False, sep = "|", encoding = "utf-8")

**Testdaten 2A_th (thermical output)**

The thermical output depending on multiple data series. Not quite sure what they mean for now, but V_dotKW and T_vorKW seem relevant for cold starts

In [None]:
fig,axs = plt.subplots(3, 3, figsize = (20,16))
axs = axs.ravel()
axs[0].plot(data_4B_2A_th['t_2A_th'], data_4B_2A_th['T_L'])
axs[1].plot(data_4B_2A_th['t_2A_th'], data_4B_2A_th['T_rueckKW'])
axs[2].plot(data_4B_2A_th['t_2A_th'], data_4B_2A_th['T_vorKW'])
axs[3].plot(data_4B_2A_th['t_2A_th'], data_4B_2A_th['V_dotB'])
axs[4].plot(data_4B_2A_th['t_2A_th'], data_4B_2A_th['V_dotKW'])
axs[5].plot(data_4B_2A_th['t_2A_th'], data_4B_2A_th['m_dotL'])
axs[6].plot(data_4B_2A_th['t_2A_th'], data_4B_2A_th['p_L'])
axs[7].plot(data_4B_2A_th['t_2A_th'], data_4B_2A_th['sw_kuehler'])
plt.show()

th_measures_2A = pd.DataFrame(list(zip(data_4B_2A_th['t_2A_th'],
                             data_4B_2A_th['T_L'],
                             data_4B_2A_th['T_rueckKW'],
                             data_4B_2A_th['T_vorKW'],
                             data_4B_2A_th['V_dotB'],
                             data_4B_2A_th['V_dotKW'],
                             data_4B_2A_th['m_dotL'],
                             data_4B_2A_th['p_L'],
                             data_4B_2A_th['sw_kuehler'])))
th_measures_2A.columns = ['th_time_measures', 'th_TL', 'th_Trueck', 'th_Tvor', 'th_VdotB', 'th_VdotKW', 'th_mdotL', 'th_pL', 'th_swKuehler']
th_measures_2A.to_csv(SAVE_FOLDER + "th_measures_2A.csv",
                     index = False, sep = "|", encoding = "utf-8")

**Drehzahldaten (rotation)**

We focus on rotation data, so data from File _'Drehzahldaten_Test_ID_4b'_

In [None]:
fig,axs = plt.subplots(3, 1, figsize = (20,16))
axs = axs.ravel()
axs[0].plot(data_4B_drehzahl['t_nsoll_stil'], data_4B_drehzahl['sw_nsoll_stil'])
axs[1].plot(data_4B_drehzahl['t_nsoll_stil'], data_4B_drehzahl['n_4b_soll'])
axs[2].plot(data_4B_drehzahl['t_stil'], data_4B_drehzahl['n_stil'])
plt.show()

#save the 3 different timelines in 2 data frames
#(input voltage and measured rotation vs. smoothed rotation)
input_drehzahl = pd.DataFrame(list(zip(data_4B_drehzahl['t_nsoll_stil'],
                       data_4B_drehzahl['sw_nsoll_stil'],
                       data_4B_drehzahl['n_4b_soll'])))
input_drehzahl.columns = ['nsoll_time', 'nsoll_input_voltage', 'nsoll_measured']
approx_drehzahl = pd.DataFrame(list(zip(data_4B_drehzahl['t_stil'],
                  data_4B_drehzahl['n_stil'])))
approx_drehzahl.columns = ['nsoll_time', 'nsoll_approx']


input_drehzahl.to_csv(SAVE_FOLDER + "input_drehzahl.csv",
                     index = False, sep = "|", encoding = "utf-8")
approx_drehzahl.to_csv(SAVE_FOLDER + "approx_drehzahl.csv",
                     index = False, sep = "|", encoding = "utf-8")

**Leistungsdaten (power)**

We focus on power data, so data from File _'Leistungsdaten_Test_ID_4b'_

In [None]:
fig,axs = plt.subplots(3, 2, figsize = (20,12))
axs = axs.ravel()
axs[4].plot(data_4B_power['t_elstil'], data_4B_power['P_elstil'])
axs[5].plot(data_4B_power['t_thstil'], data_4B_power['P_thstil'])
axs[0].plot(data_4B_power['t_1B_el_neu'], data_4B_power['P_el_rms'])
axs[1].plot(data_4B_power['t_1B_el_neu'], data_4B_power['P_th'][::2])
axs[2].plot(data_4B_power['t_1B_el_neu'][::25], data_4B_power['P_th_mean'])
plt.show()


el_output_raw = pd.DataFrame(list(zip(data_4B_power['t_1B_el_neu'],
                    data_4B_power['P_el_rms'],
                    data_4B_power['P_th'],
                    data_4B_power['P_th_mean'])))
el_output_raw.columns = ['el_time', 'el_power', 'th_power', 'th_mean']
el_output_approx = pd.DataFrame(list(zip(data_4B_power['t_elstil'],
                  data_4B_power['P_elstil'],
                  data_4B_power['P_thstil'])))
el_output_approx.columns = ['el_time_approx', 'el_power_approx', 'th_power_approx']


el_output_raw.to_csv(SAVE_FOLDER + "power_output_raw.csv",
                     index = False, sep = "|", encoding = "utf-8")
el_output_approx.to_csv(SAVE_FOLDER + "power_output_approx.csv",
                     index = False, sep = "|", encoding = "utf-8")