In [94]:
import pandas as pd
import numpy as np
import csv
import itertools
from datetime import datetime
import glob

In [96]:
def read_original_df(rute:str = 'dataset/Export_Time.csv') -> pd.DataFrame:
    list_rows = []
    list_rows_with_data = []
    format_string = '%m/%d/%Y %I:%M:%S %p'
    with open(rute, newline='\n') as csvfile:
        csvreader = csv.DictReader(csvfile, delimiter=',')
        for i,row in enumerate(csvreader):
            filters = row.get('Point Path').split('\\')
            if row.get('Samples') == '0' or row.get('Data') == None:
                list_rows_with_data.append(row)
            else:
                row['DTS_new'] = datetime.strptime(row.pop('DTS'), format_string)
                row['point'] =  filters[7]
                data = [row.pop('Data')]
                row['newData'] = row.pop(None)
                row['newData'] = list(itertools.chain(data, row['newData']))
                row['newData'] = [float(item) for item in row['newData']]
                list_rows.append(row)
    df = pd.DataFrame(list_rows)
    df = pd.concat([df.drop('newData', axis=1), pd.DataFrame(df['newData'].tolist())], axis=1)
    
    return df

In [97]:
def chunk_df(list_rows: pd.DataFrame, chunks:int = 6) -> None:
    df_chunks:list[pd.DataFrame] = np.array_split(list_rows,chunks)
    for i,chunk in enumerate(df_chunks):
        chunk.to_csv(f'dataset/chunks/Export_Time_{i}.csv',index=False,sep=',')

In [98]:
def read_chunk_data(search_str:str = 'dataset/chunks/Export_Time_*.csv') -> pd.DataFrame:
    all_chunk_files = glob.glob(search_str)
    df_list = [pd.read_csv(chunk_file,sep=',') for chunk_file in all_chunk_files]
    combined_df = pd.concat(df_list, ignore_index=True)
    return combined_df

In [99]:
# # chunk raw data
# list_rows = read_original_df(rute = 'dataset/Export_Time.csv')
# chunk_df(list_rows,chunks = 15)

  return bound(*args, **kwds)


In [100]:
# # Work with original data
# df = read_original_df(rute = 'dataset/Export_Time.csv')

In [101]:
# Work with chunk data
df = read_chunk_data(search_str = 'dataset/chunks/Export_Time_*.csv')

In [102]:
df.head()

Unnamed: 0,Point Path,Unit,Detection,Channel,Samples,Max Time,Speed (Hz),Process Value,DTS_new,point,...,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767
0,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,,Peak,1,8192,3.2,16.0,,2024-06-24 10:42:11,1HV,...,,,,,,,,,,
1,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,,Peak,1,8192,3.2,16.0,,2024-06-17 11:19:20,1HV,...,,,,,,,,,,
2,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,,Peak,1,8192,3.2,16.0,,2024-06-10 09:46:03,1HV,...,,,,,,,,,,
3,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,,Peak,1,8192,3.2,16.0,,2024-06-03 07:45:13,1HV,...,,,,,,,,,,
4,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,,Peak,1,8192,3.2,16.0,,2024-05-26 09:32:36,1HV,...,,,,,,,,,,


In [103]:
path_tag_df = df['Point Path'].str.split( r'\\' , expand = True)

In [104]:
path_tag_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,,Hierarchy,MOLIENDA,MOLINO BOLAS 04,3224-MOLINO BOLA04 E,ML-004-ESTE,MOTOR,1HV,
1,,Hierarchy,MOLIENDA,MOLINO BOLAS 04,3224-MOLINO BOLA04 E,ML-004-ESTE,MOTOR,1HV,
2,,Hierarchy,MOLIENDA,MOLINO BOLAS 04,3224-MOLINO BOLA04 E,ML-004-ESTE,MOTOR,1HV,
3,,Hierarchy,MOLIENDA,MOLINO BOLAS 04,3224-MOLINO BOLA04 E,ML-004-ESTE,MOTOR,1HV,
4,,Hierarchy,MOLIENDA,MOLINO BOLAS 04,3224-MOLINO BOLA04 E,ML-004-ESTE,MOTOR,1HV,


In [105]:
df = df[(path_tag_df[6] == 'CONTRAEJE') & (path_tag_df[7].isin(['7AV','7HV','7VV','8AV','8HV','8VV']))]

In [106]:
df.reset_index(inplace=True,drop=True)

In [107]:
df.head()

Unnamed: 0,Point Path,Unit,Detection,Channel,Samples,Max Time,Speed (Hz),Process Value,DTS_new,point,...,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767
0,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,,Peak,1,8192,4.0,2.833,,2024-06-24 10:36:20,7HV,...,,,,,,,,,,
1,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,,Peak,1,8192,4.0,2.833,,2024-06-17 11:28:25,7HV,...,,,,,,,,,,
2,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,,Peak,1,8192,4.0,2.833,,2024-06-10 09:40:35,7HV,...,,,,,,,,,,
3,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,,Peak,1,8192,4.0,2.833,,2024-06-03 07:53:28,7HV,...,,,,,,,,,,
4,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,,Peak,1,8192,4.0,2.833,,2024-05-26 09:41:06,7HV,...,,,,,,,,,,


In [108]:
df.drop(columns=df.loc[:,df.isna().sum() == 310].columns, axis=1, inplace=True)

In [109]:
df.head()

Unnamed: 0,Point Path,Detection,Channel,Samples,Max Time,Speed (Hz),DTS_new,point,0,1,...,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191
0,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,Peak,1,8192,4.0,2.833,2024-06-24 10:36:20,7HV,0.22372,-0.143025,...,0.094253,0.182554,0.005301,0.012927,0.053954,-0.043668,0.062526,0.216054,-0.000729,-0.006345
1,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,Peak,1,8192,4.0,2.833,2024-06-17 11:28:25,7HV,0.241477,0.164616,...,-0.016933,0.038991,-0.180595,0.297235,0.354881,-0.343904,-0.259365,0.226537,-0.002179,0.238303
2,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,Peak,1,8192,4.0,2.833,2024-06-10 09:40:35,7HV,0.037745,-0.196683,...,-0.248837,-0.017379,-0.251733,0.10684,0.051789,0.237816,0.013717,-0.18109,-0.050769,-0.180689
3,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,Peak,1,8192,4.0,2.833,2024-06-03 07:53:28,7HV,0.126455,0.004901,...,-0.249543,-0.203694,0.265743,0.383425,0.071253,-0.048206,-0.199709,-0.301079,0.076004,0.390833
4,\Hierarchy\MOLIENDA\MOLINO BOLAS 04\3224-MOLIN...,Peak,1,8192,4.0,2.833,2024-05-26 09:41:06,7HV,-0.377512,-0.150958,...,0.140126,-0.147141,-0.355537,-0.201252,0.251159,0.276667,0.10946,-0.079051,-0.335936,-0.277621
