## Erstmal auf den Zyklus gruppieren und dann Features erstellen

offen:
- kommentierung

### Import

In [1]:
import pandas as pd
import numpy as np

import os
import sys
import logging

import functions as fu
from feature_engineering import *

# Pfade setzen
c_fold = fu.get_folder()
p_fold = c_fold + '\data\Prepared'
folder = c_fold + '\data\Processed\Zyklus'
folder_final = c_fold + '\data\Processed\\final'


# Initialize Logging
logging.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s',
                    datefmt='%H:%M:%S',
                    level=logging.INFO,
                    handlers=[
                        logging.FileHandler(
                            filename='datalogger.log',
                            mode='a',
                            encoding='utf-8'),
                        logging.StreamHandler(sys.stdout)])

In [2]:
def stark_leicht(type_, current_mean, relativeTime_amax):
    entladen_stark = 0
    entladen_leicht = 0
    laden_stark = 0
    laden_leicht = 0
    pause = 0
    
    
    if type_ == 'D': #discharge
        if current_mean >= 3:
            entladen_stark = relativeTime_amax
        else:
            entladen_leicht = relativeTime_amax
    if type_ == 'C': #charge
        if current_mean <= -3:
            laden_stark = relativeTime_amax
        else:
            laden_leicht = relativeTime_amax
    if type_ == 'R':
        pause = relativeTime_amax
    return entladen_stark, entladen_leicht, laden_stark, laden_leicht, pause

    

def temp_hoch(temperature_amax, relativeTime_amax):
    temp_hoch = 0
    if temperature_amax >= 30:
        return relativeTime_amax
    else:
        return 0
        

for file in os.listdir(p_fold):
    if ".csv" in file:
# file = "prepared_RW1.csv" #für tests nur eins
        logging.info(f"<<< Started processing file: {file} >>>")
        new_filename = file.replace("prepared","processed")

        df = pd.read_csv(f"{p_fold}\\{file}", index_col = 0,
                                 #nrows= 100000
                                )
        logging.info(f"<<< Size of Dataframe: {df.shape} >>>")

        #Auf Ebene Batterie + Zyklus gruppieren mit benötigten Aggregationen
        df_pv = pd.pivot_table(
           df,
           index=['batteryname','zyklus','comment','type'],
           aggfunc={'time': np.min, 'relativeTime':  np.max, 'voltage': [np.min, np.max, np.mean], 'current': [np.min, np.max, np.mean], 'temperature': [np.min, np.max, np.mean]}
        ).reset_index()

        df_pv.columns = ['_'.join(col) for col in list(df_pv.columns)]



        df_pv['time_entladen_stark'],df_pv['time_entladen_leicht'],df_pv['time_laden_stark'],df_pv['time_laden_leicht'],df_pv['time_pause']  = zip(*df_pv.apply(lambda x: stark_leicht(x['type_'],x['current_mean'],x['relativeTime_amax']),axis=1))

        df_pv['time_entladen_stark_vorher'] = df_pv.time_entladen_stark.cumsum()
        df_pv['time_entladen_leicht_vorher'] = df_pv.time_entladen_leicht.cumsum()
        df_pv['time_laden_stark_vorher'] = df_pv.time_laden_stark.cumsum()
        df_pv['time_laden_stark_vorher'] = df_pv.time_laden_leicht.cumsum()
        df_pv['time_pause_vorher'] = df_pv.time_pause.cumsum()


        df_pv['time_temp_hoch'] = df_pv.apply(lambda x: temp_hoch(x['temperature_amax'],x['relativeTime_amax']),axis=1)

        df_pv['time_temp_hoch_vorher'] = df_pv.time_temp_hoch.cumsum()

        df_pv['amperestunden'] = (df_pv['current_mean']*df_pv['relativeTime_amax'])/3600
        #mapply(trapz,refDisSteps$relativeTime,refDisSteps$current)/3600

        df_pv_filtered = df_pv[df_pv['comment_'] == 'reference discharge']


        logging.info(f"<<< Start saving dataframe with shape {df_pv_filtered.shape} >>>")
        df_pv.to_csv(f"{folder}\\{new_filename}", sep=',', index=False)
        logging.info(f"<<< Saved processed file in directory {folder}\\{new_filename} >>>")

[21:09:07] INFO: <<< Started processing file: prepared_RW1.csv >>>
[21:09:12] INFO: <<< Size of Dataframe: (2958840, 11) >>>
[21:09:13] INFO: <<< Start saving dataframe with shape (48, 27) >>>
[21:09:13] INFO: <<< Saved processed file in directory C:\Users\danie\OneDrive\Dokumente\_Programmierung\python_workspaces\Uni_Master\battery_failure_predictions\data\Processed\Zyklus\processed_RW1.csv >>>
[21:09:13] INFO: <<< Started processing file: prepared_RW10.csv >>>
[21:09:28] INFO: <<< Size of Dataframe: (8596025, 11) >>>
[21:09:34] INFO: <<< Start saving dataframe with shape (77, 27) >>>
[21:09:37] INFO: <<< Saved processed file in directory C:\Users\danie\OneDrive\Dokumente\_Programmierung\python_workspaces\Uni_Master\battery_failure_predictions\data\Processed\Zyklus\processed_RW10.csv >>>
[21:09:37] INFO: <<< Started processing file: prepared_RW11.csv >>>
[21:09:53] INFO: <<< Size of Dataframe: (8664510, 11) >>>
[21:09:59] INFO: <<< Start saving dataframe with shape (77, 27) >>>
[21:10

## DFs zusammenführen

In [3]:
d = []

df_full = pd.DataFrame(d, columns=df_pv_filtered.columns)

for file in os.listdir(folder):
    if ".csv" in file:
        df = pd.read_csv(f"{folder}\\{file}")
        df_full = df_full.append(df)
df_full = df_full.reset_index()

# Variablenauswahl
* **index** - kann raus
* **batteryname_** - kann weg: keinen Einfluss
* **zyklus_** - bleibt: Anzahl an Lade/Entlade/Pause Wechseln
* **comment_** - kann weg: ist in time_entladen_*
* **type_** - kann weg: ist in time_entladen_*
* **current_amax** - kann weg: ist in Zielvariable/immer gleich
* **current_amin** - kann weg: ist in Zielvariable/immer gleich
* **current_mean** - kann weg: ist in Zielvariable/immer gleich
* **relativeTime_amax** - kann weg: ist in Zielvariable
* **temperature_amax** - bleibt: kann man mal ausprobieren, ob Temp Einfluss hat
* **temperature_amin** - bleibt: kann man mal ausprobieren, ob Temp Einfluss hat
* **temperature_mean** - bleibt: kann man mal ausprobieren, ob Temp Einfluss hat
* **time_amin** - bleibt: Gesamtalter der Batterie bei Start
* **voltage_amax** - kann weg: bei Ref Discharge immer von 4.2V zu 3.2V
* **voltage_amin** - kann weg: bei Ref Discharge immer von 4.2V zu 3.2V
* **voltage_mean** - kann weg: bei Ref Discharge immer von 4.2V zu 3.2V
* **time_entladen_stark** - kann weg: nur benötigt für "vorher"-Berechnung
* **time_entladen_leicht** - kann weg: nur benötigt für "vorher"-Berechnung
* **time_laden_stark** - kann weg: nur benötigt für "vorher"-Berechnung
* **time_laden_leicht** - kann weg: nur benötigt für "vorher"-Berechnung
* **time_pause** - kann weg: nur benötigt für "vorher"-Berechnung
* **time_entladen_stark_vorher** - bleibt: wie vorher verwendet
* **time_entladen_leicht_vorher** - bleibt: wie vorher verwendet
* **time_laden_stark_vorher** - bleibt: wie vorher verwendet
* **time_pause_vorher** - bleibt: wie vorher verwendet
* **time_temp_hoch** - bleibt: wie vorher verwendet
* **time_temp_hoch_vorher** - bleibt: wie vorher verwendet
* **amperestunden** - bleibt: Zielvariable

In [7]:
var_auswahl = [
    "batteryname_",
    "amperestunden",
    "zyklus_",
    "comment_",
    "type_",
    "temperature_amax",
    "temperature_amin",
    "temperature_mean",
    "time_amin",
    "time_entladen_stark_vorher",
    "time_entladen_leicht_vorher",
    "time_laden_stark_vorher",
    "time_pause_vorher",
    "time_temp_hoch",
    "time_temp_hoch_vorher"
]

df_final = df_full[var_auswahl]
df_final.to_csv(f"{folder_final}\\df_fertige_features.csv", sep=',', index=False)
df_final

Unnamed: 0,batteryname_,amperestunden,zyklus_,comment_,type_,temperature_amax,temperature_amin,temperature_mean,time_amin,time_entladen_stark_vorher,time_entladen_leicht_vorher,time_laden_stark_vorher,time_pause_vorher,time_temp_hoch,time_temp_hoch_vorher
0,RW1,2.094997,0,low current discharge at 0.04A,D,18.79972,17.08313,17.939268,1538.07,0.00,188976.68,0.00,0.00,0.00,0.000000e+00
1,RW1,-2.120549,1,low current charge,C,18.64223,17.06738,17.867832,190514.75,0.00,188976.68,191271.64,0.00,0.00,0.000000e+00
2,RW1,-0.021099,2,reference charge,C,18.37451,18.09103,18.232178,381786.29,0.00,188976.68,194718.28,0.00,0.00,0.000000e+00
3,RW1,2.000364,3,reference discharge,D,23.56742,18.39025,21.798768,385232.92,0.00,196178.50,194718.28,0.00,0.00,0.000000e+00
4,RW1,-2.005160,4,reference charge,C,25.28264,17.69732,20.055134,392434.74,0.00,196178.50,207816.73,0.00,0.00,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1117889,RW9,0.000000,113573,rest post reference discharge,R,38.29132,30.73195,32.874813,12675446.09,319549.85,4142731.52,5144349.95,1101476.63,7200.00,9.401590e+06
1117890,RW9,-0.774939,113574,reference charge,C,34.13290,30.76287,32.242548,12682646.12,319549.85,4142731.52,5164882.66,1101476.63,20532.71,9.422123e+06
1117891,RW9,0.000000,113575,rest post reference charge,R,32.47880,32.40151,32.446385,12703178.80,319549.85,4142731.52,5164882.66,1101776.63,300.00,9.422423e+06
1117892,RW9,0.750203,113576,reference discharge,D,38.86329,32.46334,35.958514,12703478.83,319549.85,4145432.36,5164882.66,1101776.63,2700.84,9.425124e+06


## Nachher:
* Bei Temperatur scheint es Ausreißer in die -4000er Region zu geben, muss man noch ersetzen wenn man es benutzen will