In [3]:
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt # plotting
import re
import os 

## 1. NASA Bearing Test Data
### Filepaths for Raw Data

In [4]:
test1_fp = 'data/NASA Bearings Full/1st_test/1st_test'
test2_fp = 'data/NASA Bearings Full/2nd_test/2nd_test'
test3_fp = 'data/NASA Bearings Full/3rd_test/3rd_test'
test4_fp = 'data/NASA Bearings Full/3rd_test/4th_test/txt'

test2_mean_fp = 'data/NASA Bearings Reduced'

t1_chans = ['B1_c1', 'B1_c2', 'B2_c3', 'B2_c4', 'B3_c5', 'B3_c6', 'B4_c7', 'B4_c8']
t2_chans = ['B1_c1', 'B2_c2', 'B3_c3', 'B4_c4']
t3_chans = ['B1_c1', 'B2_c2', 'B3_c3', 'B4_c4']

### Create Combined Data Files

In [5]:
# Define a file to process
set_dir = test1_fp
set_ref = 'test1_abs'

SAMPLING_RATE = 0.05 # From test data in milliseconds

# Filenames are named according to their recording start timestamp
# Arrange by timestamp so that they will processed in the correct order
# We will be using a row bind process
filenames = sorted([name for name in os.listdir(set_dir)])

In [6]:
# Create a simple slicer vector to downsample data
nrows = 20480 # Number of rows in each data file
sample_win = 10 # Downsample data to roughly 0.1s steps (10Hz) 
split_incr = nrows / (sample_win - 1)
slicer = []
sl = 0
for i in range(sample_win):
        slicer.append(int(sl))
        sl += split_incr
print(slicer)

[0, 2275, 4551, 6826, 9102, 11377, 13653, 15928, 18204, 20479]


In [7]:
# Explanation of reduced files (merged datasets)
test = pd.read_csv(os.path.join('data/NASA Bearings Full/1st_test/1st_test', '2003.10.22.12.06.24'), sep = "\t", header = None)

# Each 1 sec file is reduced to the mean of the absolute value of each column
# We will also use this data for comparison later
print(test[0].abs().mean())

0.10414384765624869


In [9]:
# Let's read in each datafile in the defined directory, downsample it & then
# bind the downsampled data frame to the previous

use_abs = False # option to extract absolute value at each timestep location
df_set = pd.DataFrame() # Create an empty DF to collect the downsampled data
for count, fn in enumerate(filenames):
    print('File ' + str(count + 1) + ' of '  + str(len(filenames)))
    df = pd.read_csv(os.path.join(set_dir, fn), sep = "\t", header = None)
    nrows = len(df)
    
    # Derive timestamp field
    dt = pd.to_datetime(fn +'.000', format = '%Y.%m.%d.%H.%M.%S.%f')
    step = datetime.timedelta(milliseconds = SAMPLING_RATE)
    dt_list = []

    for i in range(nrows):
        dt_list.append(dt.strftime('%Y.%m.%d.%H.%M.%S.%f'))
        dt += step
    
    # Change the index to the derived timestamp field
    df.index = dt_list
    
    # Lets downsample the data using simple window sampling
    # Extract the value at a defined timestep
    df = df.iloc[slicer, :]    
    
    # If invoked as an option, get absolute values
    if use_abs: df = df.abs()
    
    # Row bind with previous before moving to next
    df_set = pd.concat([df_set, df])

File 1 of 2156
File 2 of 2156
File 3 of 2156
File 4 of 2156
File 5 of 2156
File 6 of 2156
File 7 of 2156
File 8 of 2156
File 9 of 2156
File 10 of 2156
File 11 of 2156
File 12 of 2156
File 13 of 2156
File 14 of 2156
File 15 of 2156
File 16 of 2156
File 17 of 2156
File 18 of 2156
File 19 of 2156
File 20 of 2156
File 21 of 2156
File 22 of 2156
File 23 of 2156
File 24 of 2156
File 25 of 2156
File 26 of 2156
File 27 of 2156
File 28 of 2156
File 29 of 2156
File 30 of 2156
File 31 of 2156
File 32 of 2156
File 33 of 2156
File 34 of 2156
File 35 of 2156
File 36 of 2156
File 37 of 2156
File 38 of 2156
File 39 of 2156
File 40 of 2156
File 41 of 2156
File 42 of 2156
File 43 of 2156
File 44 of 2156
File 45 of 2156
File 46 of 2156
File 47 of 2156
File 48 of 2156
File 49 of 2156
File 50 of 2156
File 51 of 2156
File 52 of 2156
File 53 of 2156
File 54 of 2156
File 55 of 2156
File 56 of 2156
File 57 of 2156
File 58 of 2156
File 59 of 2156
File 60 of 2156
File 61 of 2156
File 62 of 2156
File 63 of 2156
F

File 491 of 2156
File 492 of 2156
File 493 of 2156
File 494 of 2156
File 495 of 2156
File 496 of 2156
File 497 of 2156
File 498 of 2156
File 499 of 2156
File 500 of 2156
File 501 of 2156
File 502 of 2156
File 503 of 2156
File 504 of 2156
File 505 of 2156
File 506 of 2156
File 507 of 2156
File 508 of 2156
File 509 of 2156
File 510 of 2156
File 511 of 2156
File 512 of 2156
File 513 of 2156
File 514 of 2156
File 515 of 2156
File 516 of 2156
File 517 of 2156
File 518 of 2156
File 519 of 2156
File 520 of 2156
File 521 of 2156
File 522 of 2156
File 523 of 2156
File 524 of 2156
File 525 of 2156
File 526 of 2156
File 527 of 2156
File 528 of 2156
File 529 of 2156
File 530 of 2156
File 531 of 2156
File 532 of 2156
File 533 of 2156
File 534 of 2156
File 535 of 2156
File 536 of 2156
File 537 of 2156
File 538 of 2156
File 539 of 2156
File 540 of 2156
File 541 of 2156
File 542 of 2156
File 543 of 2156
File 544 of 2156
File 545 of 2156
File 546 of 2156
File 547 of 2156
File 548 of 2156
File 549 of 21

File 973 of 2156
File 974 of 2156
File 975 of 2156
File 976 of 2156
File 977 of 2156
File 978 of 2156
File 979 of 2156
File 980 of 2156
File 981 of 2156
File 982 of 2156
File 983 of 2156
File 984 of 2156
File 985 of 2156
File 986 of 2156
File 987 of 2156
File 988 of 2156
File 989 of 2156
File 990 of 2156
File 991 of 2156
File 992 of 2156
File 993 of 2156
File 994 of 2156
File 995 of 2156
File 996 of 2156
File 997 of 2156
File 998 of 2156
File 999 of 2156
File 1000 of 2156
File 1001 of 2156
File 1002 of 2156
File 1003 of 2156
File 1004 of 2156
File 1005 of 2156
File 1006 of 2156
File 1007 of 2156
File 1008 of 2156
File 1009 of 2156
File 1010 of 2156
File 1011 of 2156
File 1012 of 2156
File 1013 of 2156
File 1014 of 2156
File 1015 of 2156
File 1016 of 2156
File 1017 of 2156
File 1018 of 2156
File 1019 of 2156
File 1020 of 2156
File 1021 of 2156
File 1022 of 2156
File 1023 of 2156
File 1024 of 2156
File 1025 of 2156
File 1026 of 2156
File 1027 of 2156
File 1028 of 2156
File 1029 of 2156
F

File 1431 of 2156
File 1432 of 2156
File 1433 of 2156
File 1434 of 2156
File 1435 of 2156
File 1436 of 2156
File 1437 of 2156
File 1438 of 2156
File 1439 of 2156
File 1440 of 2156
File 1441 of 2156
File 1442 of 2156
File 1443 of 2156
File 1444 of 2156
File 1445 of 2156
File 1446 of 2156
File 1447 of 2156
File 1448 of 2156
File 1449 of 2156
File 1450 of 2156
File 1451 of 2156
File 1452 of 2156
File 1453 of 2156
File 1454 of 2156
File 1455 of 2156
File 1456 of 2156
File 1457 of 2156
File 1458 of 2156
File 1459 of 2156
File 1460 of 2156
File 1461 of 2156
File 1462 of 2156
File 1463 of 2156
File 1464 of 2156
File 1465 of 2156
File 1466 of 2156
File 1467 of 2156
File 1468 of 2156
File 1469 of 2156
File 1470 of 2156
File 1471 of 2156
File 1472 of 2156
File 1473 of 2156
File 1474 of 2156
File 1475 of 2156
File 1476 of 2156
File 1477 of 2156
File 1478 of 2156
File 1479 of 2156
File 1480 of 2156
File 1481 of 2156
File 1482 of 2156
File 1483 of 2156
File 1484 of 2156
File 1485 of 2156
File 1486 

File 1887 of 2156
File 1888 of 2156
File 1889 of 2156
File 1890 of 2156
File 1891 of 2156
File 1892 of 2156
File 1893 of 2156
File 1894 of 2156
File 1895 of 2156
File 1896 of 2156
File 1897 of 2156
File 1898 of 2156
File 1899 of 2156
File 1900 of 2156
File 1901 of 2156
File 1902 of 2156
File 1903 of 2156
File 1904 of 2156
File 1905 of 2156
File 1906 of 2156
File 1907 of 2156
File 1908 of 2156
File 1909 of 2156
File 1910 of 2156
File 1911 of 2156
File 1912 of 2156
File 1913 of 2156
File 1914 of 2156
File 1915 of 2156
File 1916 of 2156
File 1917 of 2156
File 1918 of 2156
File 1919 of 2156
File 1920 of 2156
File 1921 of 2156
File 1922 of 2156
File 1923 of 2156
File 1924 of 2156
File 1925 of 2156
File 1926 of 2156
File 1927 of 2156
File 1928 of 2156
File 1929 of 2156
File 1930 of 2156
File 1931 of 2156
File 1932 of 2156
File 1933 of 2156
File 1934 of 2156
File 1935 of 2156
File 1936 of 2156
File 1937 of 2156
File 1938 of 2156
File 1939 of 2156
File 1940 of 2156
File 1941 of 2156
File 1942 

In [10]:
df_set.head(10)     

Unnamed: 0,0,1,2,3,4,5,6,7
2003.10.22.12.06.24.000000,-0.022,-0.039,-0.183,-0.054,-0.105,-0.134,-0.129,-0.142
2003.10.22.12.06.24.113750,-0.085,-0.203,-0.137,-0.146,0.0,-0.183,-0.129,-0.105
2003.10.22.12.06.24.227550,-0.039,-0.051,-0.012,-0.024,-0.227,-0.251,-0.166,-0.068
2003.10.22.12.06.24.341300,-0.029,-0.168,-0.029,-0.056,0.007,-0.137,-0.134,-0.088
2003.10.22.12.06.24.455100,-0.098,-0.09,0.083,-0.11,-0.002,-0.261,-0.078,-0.024
2003.10.22.12.06.24.568850,-0.146,-0.073,0.032,-0.142,-0.115,-0.056,-0.137,-0.044
2003.10.22.12.06.24.682650,-0.139,-0.046,-0.081,-0.129,0.051,-0.098,-0.007,-0.168
2003.10.22.12.06.24.796400,-0.054,-0.129,-0.073,-0.081,-0.127,0.02,-0.107,-0.044
2003.10.22.12.06.24.910200,-0.01,-0.078,0.017,0.083,-0.051,0.056,-0.134,-0.11
2003.10.22.12.06.25.023950,-0.042,0.098,0.103,-0.059,-0.002,-0.342,-0.095,-0.276


In [11]:
# Rename the columns as per channel definition from NASA test document
chans = t1_chans
for ind, val in enumerate(chans):
    df_set = df_set.rename(columns = {
    ind : chans[ind]
    })   

In [12]:
df_set.describe()

Unnamed: 0,B1_c1,B1_c2,B2_c3,B2_c4,B3_c5,B3_c6,B4_c7,B4_c8
count,21560.0,21560.0,21560.0,21560.0,21560.0,21560.0,21560.0,21560.0
mean,-0.116648,-0.116262,-0.11626,-0.115546,-0.112209,-0.113254,-0.113527,-0.113032
std,0.09738,0.088524,0.094746,0.078626,0.118037,0.122855,0.096138,0.099437
min,-0.652,-0.725,-0.684,-0.552,-1.541,-1.096,-1.05,-1.062
25%,-0.181,-0.173,-0.181,-0.166,-0.186,-0.188,-0.166,-0.168
50%,-0.117,-0.115,-0.115,-0.115,-0.112,-0.112,-0.112,-0.112
75%,-0.054,-0.059,-0.051,-0.063,-0.039,-0.037,-0.061,-0.059
max,0.476,0.576,0.376,0.33,1.484,1.875,1.223,0.776


In [None]:
# Write this downsampled file to disk
df_set.to_csv(set_ref + "_combined_ds.csv", index = True)