In [106]:
# import python libs
import pprint
import math

# import 3rd party library
import mne
import numpy as np
import matplotlib.pyplot as plt

# We define the window size as 1024.
window_sz = 1024

############################################################################
#  Part1, we read the edf file from 50 users, and we align the multiple    #
#  1024 window segments.                                                   #
#  - We find the minimum in index 0, 2, 4, 5.                              #
#  - There are 64 channels, so we use the first channel                    #
############################################################################

# Init the matrix with 50 * 4
raw_data_50_4 = [[0 for j in range(4)] for i in range(50)]

# Iterate from user 1 to user 50
for user in range(1, 51):
    file_id = '00' + str(user)
    file_id = file_id if len(file_id) == 3 else file_id[1:]
    
    # Load edf for a user.
    raw = mne.io.read_raw_edf("data/S{}R01.edf".format(file_id), preload=True, verbose=False)
    
    # Get data from first channel, each channel is an array with length 9760.
    raw_ch0_arr = raw._data[0]

    # Split the 9760 data put RAW data into buckets.
    bucket_sz = math.ceil(len(raw_ch0_arr) / window_sz)
    bucket = [[] for i in range(bucket_sz)]
    for i in range(0, len(raw_ch0_arr)):
        bucket_idx = i // window_sz
        bucket[bucket_idx].append(raw_ch0_arr[i])
    
    # pick the minimum in index 0, 2, 4, 5.
    raw_data_50_4[user-1][0] = min(bucket[0])
    raw_data_50_4[user-1][1] = min(bucket[2])
    raw_data_50_4[user-1][2] = min(bucket[4])
    raw_data_50_4[user-1][3] = min(bucket[5])

print('#'*10 + ' Part 1 ' + '#'*10)
pprint.pprint(raw_data_50_4)
    
    


########## Part 1 ##########
[[-9.2e-05, -0.000128, -0.000163, -0.000186],
 [-0.000102, -6.8e-05, -7.4e-05, -0.000143],
 [-0.00013199999999999998, -0.000164, -0.000192, -0.000107],
 [-7.2e-05, -8.099999999999999e-05, -6.1e-05, -7.2e-05],
 [-0.000215, -0.000184, -0.000202, -0.000187],
 [-0.000111, -0.000106, -9.499999999999999e-05, -7.999999999999999e-05],
 [-0.000121, -0.00011399999999999999, -0.000104, -0.000103],
 [-0.000127, -0.000101, -0.000133, -0.00014],
 [-0.000205, -0.00020099999999999998, -0.00017099999999999998, -0.000271],
 [-0.00019999999999999998, -0.000111, -0.000206, -0.000325],
 [-5.1e-05, -6.5e-05, -0.000188, -3.1e-05],
 [-9.499999999999999e-05, -7.2e-05, -5.9e-05, -7.7e-05],
 [-0.00017199999999999998, -0.000153, -0.000122, -0.000138],
 [-8.7e-05, -0.000102, -0.000108, -7.9e-05],
 [-0.000124, -0.00015, -0.00015, -0.000174],
 [-3.9999999999999996e-05, -5.1e-05, -3.9e-05, -7e-05],
 [-0.000272, -0.000296, -0.000254, -0.000225],
 [-0.000203, -0.000135, -0.000157, -0.000158

In [108]:
# import python libs
import pprint
import math

# import 3rd party library
import mne
import numpy as np
import matplotlib.pyplot as plt

# We define the window size as 1024.
window_sz = 1024

############################################################################
#  Part2, we read the edf file from 50 users, and the we fit the data with #
#  FFT. After that, we align the multiple 1024 window segments.            #
#  - We find the minimum in index 0, 2, 4, 5.                              #
#  - There are 64 channels, so we use the first channel                    #
############################################################################

# Init the matrix with 50 * 4
fft_data_50_4 = [[0 for j in range(4)] for i in range(50)]

# Iterate from user 1 to user 50
for user in range(1, 51):
    file_id = '00' + str(user)
    file_id = file_id if len(file_id) == 3 else file_id[1:]
    
    # Load edf for a user.
    raw = mne.io.read_raw_edf("data/S{}R01.edf".format(file_id), preload=True, verbose=False)
    
    # Get data from first channel, each channel is an array with length 9760.
    raw_ch0_arr = raw._data[0]
    
    # Fit with FFT
    fft_ch0_arr = np.fft.fft(raw_ch0_arr)

    # Split the 9760 data, and then put FFT data into buckets.
    bucket_sz = math.ceil(len(fft_ch0_arr) / window_sz)
    bucket = [[] for i in range(bucket_sz)]
    for i in range(0, len(fft_ch0_arr)):
        bucket_idx = i // window_sz
        bucket[bucket_idx].append(fft_ch0_arr[i].real)
    
    # pick the minimum in index 0, 2, 4, 5.
    fft_data_50_4[user-1][0] = min(bucket[0])
    fft_data_50_4[user-1][1] = min(bucket[2])
    fft_data_50_4[user-1][2] = min(bucket[4])
    fft_data_50_4[user-1][3] = min(bucket[5])

print('#'*10 + ' Part 2 ' + '#'*10)
pprint.pprint(fft_data_50_4)
    
    


########## Part 2 ##########
[[-0.09234174970049808,
  -0.0033018055741130146,
  -0.001191142341553677,
  -0.005264121425473896],
 [-0.097871,
  -0.00311768076169261,
  -0.0026963977255222327,
  -0.0026416027710168015],
 [-0.06923662297032812,
  -0.004908037458279798,
  -0.001270662074759015,
  -0.0017072240198001574],
 [-0.0329141540334334,
  -0.0016416031375553921,
  -0.0004677433093774577,
  -0.0015057400614314287],
 [-0.044424742309043266,
  -0.003975410715608021,
  -0.0006436348087311329,
  -0.02289150577395796],
 [-0.20011099999999996,
  -0.004682790851959705,
  -0.0009585451838378448,
  -0.019296431862347654],
 [-0.021848,
  -0.0033483848654550273,
  -0.000557929364186183,
  -0.0035162690939115953],
 [-0.06435329391467544,
  -0.006848627432916799,
  -0.0020069470716734534,
  -0.0028565747944614465],
 [-0.14272485664221038,
  -0.009256648442220948,
  -0.011505999999999999,
  -0.09366923193724096],
 [-0.11000344136194612,
  -0.003734543416378284,
  -0.003028015498507062,
  -0.0154

In [110]:
from sklearn.decomposition import PCA

############################################################################
#  Part3, we combine the RAW data and FFT data, and then put it into PCA   #
#  to generate the PCA components.                                         #
############################################################################

pca_data_50_8 = [[0 for j in range(8)] for i in range(50)]

# Combine the RAW data and FFT data
for i in range(50):
    for j in range(4):
        pca_data_50_8[i][j] = raw_data_50_4[i][j]
    for j in range(4):
        pca_data_50_8[i][j+4] = fft_data_50_4[i][j]
    
# Fit to PCA
pca = PCA()
pca.fit(pca_data_50_8)
pca.components_  
print('#'*10 + ' Part 3 ' + '#'*10)
pprint.pprint(pca.components_ )


########## Part 3 ##########
array([[-6.09571963e-04, -4.46628440e-04, -3.34269943e-04,
        -5.76036818e-04, -9.82466336e-01, -9.36880255e-03,
        -1.57398746e-02, -1.85535350e-01],
       [-6.59786139e-04, -8.83175112e-04, -8.45633995e-04,
        -7.23321948e-04,  1.86297663e-01, -3.99752326e-02,
        -1.20527510e-01, -9.74251419e-01],
       [-3.34857089e-03, -2.95798347e-03, -4.40577560e-03,
        -3.82378281e-03, -5.65654609e-03, -4.16204747e-01,
        -9.00256471e-01,  1.27380943e-01],
       [ 6.68593047e-03,  9.98798969e-03,  1.86038900e-03,
        -9.20583376e-04, -4.53464664e-03,  9.08246277e-01,
        -4.18013582e-01,  1.35651288e-02],
       [-8.74095014e-01, -4.24790305e-01, -2.08649669e-01,
        -1.08793883e-01,  5.99363240e-04,  1.17783038e-02,
         7.90418700e-04,  7.72456248e-04],
       [ 2.86618998e-01, -1.43760596e-01, -4.83131083e-01,
        -8.14704464e-01,  3.65011094e-04,  1.70143226e-03,
         4.49930085e-03,  4.03797474e-04],
     