In [1]:
import pandas as pd
import numpy as np
import csv
import itertools
from datetime import datetime
import glob
import matplotlib.pyplot as plt

In [2]:
def read_original_df(rute:str = 'dataset/Export_Time.csv') -> pd.DataFrame:
    list_rows = []
    list_rows_with_data = []
    format_string = '%m/%d/%Y %I:%M:%S %p'
    with open(rute, newline='\n') as csvfile:
        csvreader = csv.DictReader(csvfile, delimiter=',')
        for i,row in enumerate(csvreader):
            filters = row.get('Point Path').split('\\')
            if row.get('Samples') == '0' or row.get('Data') == None:
                list_rows_with_data.append(row)
            else:
                row['DTS_new'] = datetime.strptime(row.pop('DTS'), format_string)
                row['point'] =  filters[7]
                data = [row.pop('Data')]
                row['newData'] = row.pop(None)
                row['newData'] = list(itertools.chain(data, row['newData']))
                row['newData'] = [float(item) for item in row['newData']]
                list_rows.append(row)
    df = pd.DataFrame(list_rows)
    df = pd.concat([df.drop('newData', axis=1), pd.DataFrame(df['newData'].tolist())], axis=1)
    
    return list_rows

In [3]:
def chunk_df(list_rows: pd.DataFrame, chunks:int = 6) -> None:
    df_chunks:list[pd.DataFrame] = np.array_split(list_rows,chunks)
    for i,chunk in enumerate(df_chunks):
        chunk.to_csv(f'dataset/chunks/Export_Time_{i}.csv',index=False,sep=',')

In [4]:
def read_chunk_data(search_str:str = 'dataset/chunks/Export_Time_*.csv') -> pd.DataFrame:
    all_chunk_files = glob.glob(search_str)
    df_list = [pd.read_csv(chunk_file,sep=',') for chunk_file in all_chunk_files]
    combined_df = pd.concat(df_list, ignore_index=True)
    return combined_df

In [140]:
def filter_df(df_copy:pd.DataFrame,eq_comp:list[str] = ['CONTRAEJE'],point_vibr:list[str]=['7AV','7HV','7VV','8AV','8HV','8VV']):
    # df_copy
    path_tag_df = df_copy['Point Path'].str.split( r'\\' , expand = True)
    # df['points'] = path_tag_df[7]
    df_copy = df_copy[(path_tag_df[6].isin(eq_comp))]
    print(df_copy['point'].unique())
    df_copy = df_copy[(path_tag_df[7].isin(point_vibr))]
    df_copy.reset_index(inplace=True,drop=True)
    df_copy.drop(columns=df_copy.loc[:,df_copy.isna().sum() == len(df_copy)].columns, axis=1, inplace=True)
    # columns = [int(x) if x.isdigit() else x for x in df_copy.columns]
    # df_copy.columns = columns
    return df_copy

In [151]:
def fft_freq(df_time:pd.DataFrame):
    df_freq = df_time.copy()
    filter_vibr = [str(i) for i in range(8192)]
    df_freq = df_freq.drop(columns=filter_vibr, axis = 1)
    
    freq_data = []
    for i,row in df_time.iterrows():
        y = np.array(df_time.loc[i,filter_vibr].tolist())
        # vibration_data = data['Vibration'].values
        vibration_data = y
        # Time parameters
        sampling_rate = row['Samples']/row['Max Time']  # Hz, example sampling rate
        n = len(vibration_data)
        time = np.arange(n) / sampling_rate

        # Perform FFT
        freq_domain = np.fft.fft(vibration_data)
        freq = np.fft.fftfreq(n, d=1/sampling_rate)

        x_freq = freq[:n//2]
        y_freq = np.abs(freq_domain)[:n//2] * 1/n

        freq_data.append(y_freq)

    df_freq = pd.concat([df_freq,pd.DataFrame(freq_data)],axis=1)
    columns = [str(x) for x in df_freq.columns]
    df_freq.columns = columns
    return df_freq


In [7]:
# # chunk raw data
# list_rows = read_original_df(rute = 'dataset/Export_Time.csv')
# chunk_df(list_rows,chunks = 15)

In [8]:
# # Work with original data
# df = read_original_df(rute = 'dataset/Export_Time.csv')

In [9]:
# Work with chunk data
df = read_chunk_data(search_str = 'dataset/chunks/Export_Time_*.csv')

In [10]:
df.head()

Unnamed: 0,Point Path,Unit,Detection,Channel,Samples,Max Time,Speed (Hz),Process Value,DTS_new,point,...,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767
0,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,,Peak,1,8192,3.2,16.0,,2024-06-24 10:42:11,1HV,...,,,,,,,,,,
1,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,,Peak,1,8192,3.2,16.0,,2024-06-17 11:19:20,1HV,...,,,,,,,,,,
2,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,,Peak,1,8192,3.2,16.0,,2024-06-10 09:46:03,1HV,...,,,,,,,,,,
3,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,,Peak,1,8192,3.2,16.0,,2024-06-03 07:45:13,1HV,...,,,,,,,,,,
4,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,,Peak,1,8192,3.2,16.0,,2024-05-26 09:32:36,1HV,...,,,,,,,,,,


In [11]:
# df_time = filter_df(df,eq_comp = ['CONTRAEJE'],point_vibr=['7AV','7HV','7VV','7HE3','8AV','8HV','8VV','8HE3'])
# df_freq = fft_freq(df_time)
# df_freq.to_csv('dataset/new_data/df_freq.csv',index=False)
# df_time.to_csv('dataset/new_data/df_time.csv',index=False)

In [152]:
df_time = filter_df(df,eq_comp = ['CONTRAEJE'],point_vibr=['7HE3','8HE3'])
df_freq = fft_freq(df_time)

['7HV' '7HBaja' '7HE3' '7Hac' '7VV' '7AV' '7AO' '8HV' '8HBaja' '8HE3'
 '8Hac' '8VV' '8AV' '8AO']


  df_copy = df_copy[(path_tag_df[7].isin(point_vibr))]


In [154]:
df_freq.to_csv('dataset/new_data/df_freq_filtered.csv',index=False)
df_time.to_csv('dataset/new_data/df_time_filtered.csv',index=False)

In [155]:
df_time.drop(columns=['Point Path', 'Detection', 'Channel', 'Samples', 'Max Time','Speed (Hz)','DTS_new'], axis=1, inplace=True)
df_freq.drop(columns=['Point Path', 'Detection', 'Channel', 'Samples', 'Max Time','Speed (Hz)','DTS_new'], axis=1, inplace= True)

In [156]:
count = int(df_time.groupby('point').agg({'point': ['count']}).reset_index(drop = True).reset_index(drop=True).min())
df_time = df_time.groupby('point').head(int(count))

  count = int(df_time.groupby('point').agg({'point': ['count']}).reset_index(drop = True).reset_index(drop=True).min())


In [157]:
count = int(df_freq.groupby('point').agg({'point': ['count']}).reset_index(drop = True).reset_index(drop=True).min())
df_freq = df_freq.groupby('point').head(int(count))

  count = int(df_freq.groupby('point').agg({'point': ['count']}).reset_index(drop = True).reset_index(drop=True).min())


In [174]:
def transformData(df:pd.DataFrame):
    columns = [int(x) if x.isdigit() else x for x in df_freq.columns]
    len_vibra = max([x for x in list(columns) if isinstance(x, (int, float))])
    df.loc[df['point'] == '7HE3','target'] = 0
    df.loc[df['point'] == '8HE3','target'] = 1
    df['index'] = df.groupby(['point','target']).cumcount()
    df_time_bad = df[df['point'] == '7HE3']
    df_time_good = df[df['point'] == '8HE3']
    df_time_bad.loc[df['point'] == '7HE3','point'] = 'HE3'
    df_time_good.loc[df['point'] == '8HE3','point'] = 'HE3'

    filter_vibr = [str(i) for i in range(len_vibra)]
    f_pivoted_bad = df_time_bad.pivot(index=['index','target'], columns=['point'], values=filter_vibr)
    f_pivoted_good = df_time_good.pivot(index=['index','target'], columns=['point'], values=filter_vibr)
    f_pivoted_bad.columns = ['_'.join(col).strip() for col in f_pivoted_bad.columns.values]
    f_pivoted_good.columns = ['_'.join(col).strip() for col in f_pivoted_good.columns.values]
    df_time_done = pd.concat([f_pivoted_good,f_pivoted_bad], axis=0)
    df_time_done.reset_index(inplace=True)
    df_time_done.drop(columns=['index'], axis=1, inplace=True)
    return df_time_done

In [175]:
df_time_done = transformData(df_time)
df_freq_done = transformData(df_freq)

In [176]:
df_time_done.head()

Unnamed: 0,target,0_HE3,1_HE3,2_HE3,3_HE3,4_HE3,5_HE3,6_HE3,7_HE3,8_HE3,...,4085_HE3,4086_HE3,4087_HE3,4088_HE3,4089_HE3,4090_HE3,4091_HE3,4092_HE3,4093_HE3,4094_HE3
0,1.0,-0.040887,0.006277,0.004097,0.015771,0.096539,0.0409,-0.033063,-0.011656,0.002947,...,-0.060634,-0.024219,0.009995,0.023203,-0.00255,-0.016656,0.070001,0.030256,-0.05114,-0.006395
1,1.0,0.005245,-0.194973,-0.274077,-0.037272,-0.020225,-0.20293,0.038206,-0.10831,-0.259723,...,-0.090359,-0.005096,-0.245626,-0.202026,0.042567,-0.137664,-0.124462,0.00756,-0.19702,-0.206894
2,1.0,-0.045757,-0.01614,0.031918,-0.025115,-0.047552,-0.052164,-0.010498,0.006793,0.040769,...,-0.118453,-0.017553,0.012054,-0.024728,0.02628,-0.063965,-0.075756,-0.041017,-0.003829,-0.042683
3,1.0,-0.087019,-0.0256,-0.114839,-0.141508,0.041534,-0.07536,-0.093169,0.022566,-0.112536,...,-0.018809,-0.133055,0.029613,-0.024318,-0.100098,-0.059205,-0.080993,-0.1169,-0.087798,0.061418
4,1.0,-0.067937,-0.078963,0.039743,-0.067937,-0.070118,0.069999,-0.076139,-0.039349,-0.047935,...,-0.079606,-0.041655,0.003586,-0.100244,-0.113199,-0.109862,-0.012425,-0.065631,-0.037038,-0.013068


In [177]:
df_freq_done.head()

Unnamed: 0,target,0_HE3,1_HE3,2_HE3,3_HE3,4_HE3,5_HE3,6_HE3,7_HE3,8_HE3,...,4085_HE3,4086_HE3,4087_HE3,4088_HE3,4089_HE3,4090_HE3,4091_HE3,4092_HE3,4093_HE3,4094_HE3
0,1.0,0.01897,0.001095,0.001061,0.001539,0.000536,0.000401,0.000258,0.000753,0.000873,...,0.000122,0.000132,0.000311,0.00017,0.000242,4.6e-05,0.000166,0.000136,0.000124,0.000226
1,1.0,0.096649,0.002541,0.000904,0.000214,0.00083,0.000382,0.000969,0.000542,0.000974,...,0.000846,0.000761,0.000471,0.000286,0.000355,0.000781,0.00058,0.000717,0.000266,0.000606
2,1.0,0.030549,0.0011,0.001033,0.000455,0.000489,0.00033,0.000448,0.000284,0.000363,...,0.000215,0.000316,0.000163,0.000253,0.0002,0.000364,0.000333,0.000202,0.000373,0.000174
3,1.0,0.060288,0.001842,0.000554,0.000437,0.000636,0.000214,0.000661,0.000875,0.000901,...,0.000208,0.000671,0.000537,0.000307,7.7e-05,0.000673,0.000726,0.000581,0.000491,0.000779
4,1.0,0.044945,0.001578,0.001163,0.000463,0.000499,0.000682,0.000298,0.00028,0.00031,...,8.9e-05,0.000156,0.000395,0.000641,0.000265,8e-05,0.000139,7.6e-05,0.000158,8.4e-05


In [178]:
df_time_done.to_csv('dataset/new_data/df_time_done.csv',index=False)
df_freq_done.to_csv('dataset/new_data/df_freq_done.csv',index=False)