# Extract Data: Rectifier October 2023

In [14]:
import pandas as pd
from scipy.io import loadmat
from datautil import fault2RUL0

## Rectifier

In [15]:
data_name='Rectifier_nov23_4steps_perform3' 
sim_name='RectifierData_step10h' #

# Load Data

In [16]:
data=loadmat(f'./{data_name}/{sim_name}.mat')
sim_name=list(data.keys())[3]
print(f'Simulation: {sim_name}')

Simulation: RectifierData_step10h


In [17]:
N=data[sim_name][0].shape[0] #numberN
print(f'Number of systems: {N}')
print(f'Number of Features: {data[sim_name][0][0][0].shape[0]}')
print(f'Number of Samples/sys: {data[sim_name][0][0][0].shape[1]}')

Number of systems: 250
Number of Features: 24
Number of Samples/sys: 401


In [8]:
labels_degraded=['i1','i2','i3','Load volt','I_DC',
        'i1 nom','i2 nom','i3 nom','Load volt nom','I_DC nom',
        'L','R','clean L','clean R',
        'C01','C02','C03',
        'Ripple','Vpp',
        'ripple_fault','Vpp_fault',
        'C1_f','C2_f','C3_f']
column_labels=labels_degraded

# Create PandasDataset

In [18]:
system_fault='ripple_fault'
performance='Ripple'

In [19]:
data_df=pd.DataFrame()
for sys in range(N):
    sys_sim=data[sim_name][0][sys][0].T[1:]  # Delete first entry (fix 0 division error)
    #sys_sim=data[sim_name][0][sys][0].T
    df=pd.DataFrame(sys_sim ,columns=column_labels)
    df['System']=[sys]*sys_sim.shape[0]
    data_df=pd.concat([data_df, df], axis=0)  

# Convert Fault signals to RUL

In [20]:
# only for degraded data
pd.options.mode.chained_assignment = None  #for false positive warning

Count_df=data_df[[f'C{i+1}_f' for i in range(3)]+[system_fault]+['System']] 
Count_df['Sum']=Count_df[[f'C{i+1}_f' for i in range(3)]+[system_fault]].sum(axis=1)
s=Count_df.groupby('System')['Sum'].last()
for i in range(5):
    print(f' {i} capacitors(include System) failed {(s==i).sum()} times')


 0 capacitors(include System) failed 0 times
 1 capacitors(include System) failed 0 times
 2 capacitors(include System) failed 0 times
 3 capacitors(include System) failed 5 times
 4 capacitors(include System) failed 245 times


In [21]:
data_df

Unnamed: 0,i1,i2,i3,Load volt,I_DC,i1 nom,i2 nom,i3 nom,Load volt nom,I_DC nom,...,C02,C03,Ripple,Vpp,ripple_fault,Vpp_fault,C1_f,C2_f,C3_f,System
0,0.718119,0.718073,0.718578,3.644798,1.828490,0.718868,0.718868,0.718868,3.643944,1.827579,...,0.002,0.002,1.000525,1.000512,0.0,0.0,0.0,0.0,0.0,0
1,0.786991,0.788336,0.785945,3.488876,2.052769,0.786651,0.786651,0.786651,3.490459,2.053774,...,0.002,0.002,1.000996,1.000965,0.0,0.0,0.0,0.0,0.0,0
2,0.792861,0.792751,0.793295,3.476105,2.074553,0.793217,0.793217,0.793217,3.476627,2.075553,...,0.002,0.002,1.001421,1.001372,0.0,0.0,0.0,0.0,0.0,0
3,0.792204,0.791933,0.791551,3.475701,2.075315,0.793558,0.793558,0.793558,3.475140,2.077606,...,0.002,0.002,1.001806,1.001740,0.0,0.0,0.0,0.0,0.0,0
4,0.793151,0.792535,0.793978,3.474842,2.078964,0.793429,0.793429,0.793429,3.475198,2.077886,...,0.002,0.002,1.002157,1.002076,0.0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,0.760272,0.749603,0.759410,3.524900,1.958486,0.765085,0.765085,0.765085,3.538935,1.972084,...,0.002,0.002,1.157820,1.150266,1.0,1.0,1.0,1.0,1.0,249
396,0.760526,0.748482,0.758926,3.525665,1.957609,0.765186,0.765186,0.765186,3.538669,1.972059,...,0.002,0.002,1.158422,1.150849,1.0,1.0,1.0,1.0,1.0,249
397,0.762636,0.747925,0.758718,3.525840,1.959648,0.765126,0.765126,0.765126,3.538883,1.972101,...,0.002,0.002,1.159023,1.151398,1.0,1.0,1.0,1.0,1.0,249
398,0.762756,0.749727,0.760150,3.524471,1.959266,0.765093,0.765093,0.765093,3.538861,1.972129,...,0.002,0.002,1.159625,1.151954,1.0,1.0,1.0,1.0,1.0,249


In [22]:
# only for degraded data
print(system_fault)
df=fault2RUL0(data_df,system='System',fault=system_fault)
n=len(df.loc[df[system_fault]==1])
print(f'Number of faulty systems {n}/{N}')

ripple_fault
Number of faulty systems 250/250


## Create System dataset

## Features currently used

In [25]:
RUL_df=df[['System',
        'i1','i2','i3','I_DC',
        'R',system_fault]]#'Load volt',
RUL_df.rename(columns={system_fault:'RUL'}, inplace=True)



# RUL_df=data_df[['System',
#                 'i1 nom','i2 nom','i3 nom','I_DC nom',
#                 'L','R',
#                 'clean L','clean R']]


RUL_df.head()


Unnamed: 0,System,i1,i2,i3,I_DC,R,RUL
0,0,0.718119,0.718073,0.718578,1.82849,1.655391,319.0
1,0,0.786991,0.788336,0.785945,2.052769,1.655389,318.0
2,0,0.792861,0.792751,0.793295,2.074553,1.655402,317.0
3,0,0.792204,0.791933,0.791551,2.075315,1.65539,316.0
4,0,0.793151,0.792535,0.793978,2.078964,1.65541,315.0


In [26]:
pd.set_option('display.max_rows', 100)
life=RUL_df.groupby('System').first()
life

Unnamed: 0_level_0,i1,i2,i3,I_DC,R,RUL
System,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.718119,0.718073,0.718578,1.828490,1.655391,319.0
1,0.740128,0.739068,0.740004,1.905717,1.518721,286.0
2,0.675466,0.674492,0.676688,1.665829,1.970064,295.0
3,0.680408,0.679659,0.678503,1.679963,1.940061,323.0
4,0.678084,0.677566,0.677760,1.677173,1.949394,308.0
...,...,...,...,...,...,...
245,0.813968,0.812194,0.816600,2.201616,1.077122,315.0
246,0.784541,0.786678,0.784472,2.081472,1.243780,328.0
247,0.690265,0.693730,0.693525,1.725209,1.848422,328.0
248,0.690271,0.689517,0.690659,1.720524,1.859552,332.0


## Save data

In [27]:
RUL_df.to_csv(f'./{data_name}/{sim_name}.csv',index=False)

In [28]:
list(RUL_df.columns.to_numpy())

['System', 'i1', 'i2', 'i3', 'I_DC', 'R', 'RUL']

In [16]:
display(RUL_df)

Unnamed: 0,System,i1,i2,i3,I_DC,i1 nom,i2 nom,i3 nom,I_DC nom,L,R,clean L,clean R,Ripple,RUL
0,0,0.718119,0.718073,0.718578,1.828490,0.718868,0.718868,0.718868,1.827579,0.100001,1.655391,0.1,1.655398,1.000525,319.0
1,0,0.786991,0.788336,0.785945,2.052769,0.786651,0.786651,0.786651,2.053774,0.100000,1.655389,0.1,1.655398,1.000996,318.0
2,0,0.792861,0.792751,0.793295,2.074553,0.793217,0.793217,0.793217,2.075553,0.100000,1.655402,0.1,1.655398,1.001421,317.0
3,0,0.792204,0.791933,0.791551,2.075315,0.793558,0.793558,0.793558,2.077606,0.100000,1.655390,0.1,1.655398,1.001806,316.0
4,0,0.793151,0.792535,0.793978,2.078964,0.793429,0.793429,0.793429,2.077886,0.100000,1.655410,0.1,1.655398,1.002157,315.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,249,0.760272,0.749603,0.759410,1.958486,0.765085,0.765085,0.765085,1.972084,0.100001,1.778126,0.1,1.778125,1.157820,0.0
396,249,0.760526,0.748482,0.758926,1.957609,0.765186,0.765186,0.765186,1.972059,0.100000,1.778116,0.1,1.778125,1.158422,0.0
397,249,0.762636,0.747925,0.758718,1.959648,0.765126,0.765126,0.765126,1.972101,0.100000,1.778120,0.1,1.778125,1.159023,0.0
398,249,0.762756,0.749727,0.760150,1.959266,0.765093,0.765093,0.765093,1.972129,0.100000,1.778132,0.1,1.778125,1.159625,0.0


In [29]:
RUL_df.groupby('System').first()

Unnamed: 0_level_0,i1,i2,i3,I_DC,R,RUL
System,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.718119,0.718073,0.718578,1.828490,1.655391,319.0
1,0.740128,0.739068,0.740004,1.905717,1.518721,286.0
2,0.675466,0.674492,0.676688,1.665829,1.970064,295.0
3,0.680408,0.679659,0.678503,1.679963,1.940061,323.0
4,0.678084,0.677566,0.677760,1.677173,1.949394,308.0
...,...,...,...,...,...,...
245,0.813968,0.812194,0.816600,2.201616,1.077122,315.0
246,0.784541,0.786678,0.784472,2.081472,1.243780,328.0
247,0.690265,0.693730,0.693525,1.725209,1.848422,328.0
248,0.690271,0.689517,0.690659,1.720524,1.859552,332.0


In [30]:
RUL_df.groupby('System').nth(-1)

Unnamed: 0,System,i1,i2,i3,I_DC,R,RUL
399,0,0.961779,0.952519,0.950277,2.792098,1.179053,0.0
399,1,0.913071,0.929806,0.913964,2.639260,1.679723,0.0
399,2,0.737072,0.705635,0.718134,1.825349,1.203708,0.0
399,3,0.735513,0.709887,0.740535,1.854646,1.011121,0.0
399,4,0.858147,0.843993,0.861513,2.348778,1.713464,0.0
...,...,...,...,...,...,...,...
399,245,0.796315,0.804131,0.797287,2.121935,1.983065,0.0
399,246,0.756797,0.739138,0.761109,1.946723,1.987813,0.0
399,247,0.839605,0.849548,0.856077,2.326524,1.827972,0.0
399,248,0.734508,0.738769,0.765891,1.916207,1.245550,0.0


In [31]:
RUL_df.groupby('System').nth(-2)

Unnamed: 0,System,i1,i2,i3,I_DC,R,RUL
398,0,0.962184,0.952952,0.948731,2.791825,1.068251,0.0
398,1,0.913557,0.927666,0.913567,2.635533,1.162421,0.0
398,2,0.737593,0.707292,0.719910,1.824807,1.955107,0.0
398,3,0.735996,0.711685,0.739996,1.855107,1.916220,0.0
398,4,0.856050,0.843389,0.861801,2.348500,1.378091,0.0
...,...,...,...,...,...,...,...
398,245,0.795578,0.801580,0.798563,2.122824,1.592203,0.0
398,246,0.757183,0.739145,0.763499,1.943755,1.796166,0.0
398,247,0.838650,0.849812,0.857344,2.324771,1.400537,0.0
398,248,0.733386,0.739071,0.765070,1.918765,1.832653,0.0
