# Information about file: 

- freqs = [[1, 5], [6, 15], [16, 50], [51, 150], [151,301],[300, 600]]

- times img = [[-500, -100], [100, 500], [501, 901], [1100, 2200], [2201, 3201]]

- times aud = [[-500, -100], [100, 1000], [1001, 2501], [3300, 4200], [4201, 5001]]

- times read = [[-500, -100], [100, 1000], [1001, 3001], [3600, 4500], [4501, 5501]]

#### Final vectors:
- for each (img aud read), size of data X will be (171, 30) for 171 channels across 6 freq * 5 time ranges


# Import functions

In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# Define Preprocessing function:
    Generates a reduced frequency-time matrix based on input data, frequency ranges, and time intervals.


In [13]:

def get_reduced_freq_time_matrix(data, freqs, times, time_vector,freq_vector,flatten_matrix=False):
    # Generates a reduced frequency-time matrix based on input data, frequency ranges, and time intervals.
    # Args:

    # data (list): List of data channels.
    # freqs (list): List of frequency ranges to compute averages for.
    # times (list): List of time intervals for data selection.
    # time_vector (array): Time vector corresponding to the data.

    # Returns:
    # list: A matrix containing average values of data within specified frequency ranges and time intervals.
    # """

    chan_data = []  # Initialize an empty list to store processed channel data

    # Iterate through each data channel
    for data_channel in data:
        # print('\nshape of data_channel: ', np.shape(data_channel))
        data_prep = []  # Initialize an empty list to store processed data for this channel
        temp = data_channel.T  # Transpose the data channel

        # Extract data within specified time intervals
        temp1 = [temp[(time_vector >= i_times[0]) & (time_vector <= i_times[1])] for i_times in times]
        
        # Loop through data within time intervals and calculate averages for each frequency range
        for data_freqs in temp1:
            # print('\ndata freqs size',np.shape(data_freqs))
            data_freqs=data_freqs.T
            temp = [np.average(data_freqs[(freq_vector >= i_freq[0]) & (freq_vector <= i_freq[1])]) for i_freq in freqs]

            # temp = [np.mean(data_freqs[i_freq[0]:i_freq[1]]) for i_freq in freqs]
            # for i_freq in freqs:
            #     temp = np.mean(data_freqs[i_freq[0]:i_freq[1]],dtype=np.float64)
            #     print('\nfreqs',i_freq[0],i_freq[1])
            #     print('size within freq ',np.shape(np.mean(data_freqs[i_freq[0]:i_freq[1]],dtype=np.float64)))

            data_prep.append(temp)  # Store calculated averages for each frequency range
            # print(data_freqs[freqs[3][0]:freqs[3][1]])

        if flatten_matrix == True:
            data_prep = [x for row in data_prep for x in row]

        chan_data.append(data_prep)  # Store processed data for this channel

    # Print the shape of the processed data for a single channel and the shape of all channel data
    # print('single channel', np.shape(data_prep))
    # print('All channel shape', np.shape(chan_data))

    return chan_data  # Return the matrix containing processed data


# Read in the data and perform basic exploratory analysis

In [14]:
# df = pd.read_csv('./Datasets/wine.data.csv')
import mat73

# Define frequency and time intervals
freqs = [[1, 5], [6, 15], [16, 50], [51, 150], [151,301],[300, 600]]

# Get the time vector from data
file_name = '/Users/lorafanda/Documents/Coding/Experiments_PsychoPy/Experiments_PsychoPy/Analysis_Data/PAT_3415_wref/ERSP_img.mat'
data = mat73.loadmat(file_name)
data_label = [ i for i in data['ERSP'].keys()]
data_channels = data['ERSP'][data_label[0]]
time_vector = data['ERSP']['times'][0]
freq_vector = data['ERSP']['freqs'][0]
data = data['ERSP']['ersp']
times = [[-500, -100], [100, 500], [501, 901], [1100, 2200], [2201, 3201]]
data_small_img = get_reduced_freq_time_matrix(data, freqs, times, time_vector, freq_vector, flatten_matrix=True)

data = mat73.loadmat('/Users/lorafanda/Documents/Coding/Experiments_PsychoPy/Experiments_PsychoPy/Analysis_Data/PAT_3415_wref/ERSP_aud.mat',use_attrdict=True)
time_vector = data['ERSP']['times'][0]
data = data['ERSP']['ersp']
times = [[-500, -100], [100, 1500], [1501, 2951], [3300, 4200], [4201, 5001]]
data_small_aud = get_reduced_freq_time_matrix(data, freqs, times, time_vector,freq_vector, flatten_matrix=True)

data = mat73.loadmat('/Users/lorafanda/Documents/Coding/Experiments_PsychoPy/Experiments_PsychoPy/Analysis_Data/PAT_3415_wref/ERSP_read.mat',use_attrdict=True)
time_vector = data['ERSP']['times'][0]
data = data['ERSP']['ersp']
times = [[-500, -100], [100, 1500], [1501, 3201], [3600, 4500], [4501, 5501]]
data_small_read = get_reduced_freq_time_matrix(data, freqs, times, time_vector,freq_vector, flatten_matrix=True)




# Print the shape of the 'ersp' data
# print(np.shape(data['ERSP']['ersp']))

# ONLY RUN ONCE

# NICE WORKS 

In [15]:
data_small_img

[[0.48371515,
  0.2657639,
  -0.11943403,
  -0.04815736,
  -0.17925572,
  -0.1555734,
  1.1879635,
  0.95213133,
  -0.08699582,
  -0.3978957,
  -0.5550085,
  -0.3190761,
  1.2274517,
  0.69606096,
  -0.40306824,
  -0.74522626,
  -0.5575304,
  -0.429101,
  -0.221501,
  -0.0104826335,
  -0.06338717,
  -0.2623048,
  -0.32365164,
  -0.30866638,
  -0.59113926,
  -0.38998398,
  0.026108628,
  -0.020250373,
  0.113928415,
  0.026257332],
 [0.39860585,
  0.028650507,
  -0.2038061,
  -0.117414735,
  -0.13642673,
  -0.11485608,
  0.5884407,
  0.4992962,
  -0.3288946,
  -0.3678324,
  -0.41331512,
  -0.14581548,
  1.3066396,
  0.8063955,
  -0.2981349,
  -0.65275633,
  -0.687127,
  -0.38372338,
  0.027267631,
  -0.17643781,
  -0.3131073,
  -0.43814847,
  -0.38584268,
  -0.26136133,
  -0.38634947,
  -0.35429573,
  -0.0733114,
  -0.14287306,
  -0.045190115,
  0.021991761],
 [0.0433571,
  0.010884679,
  -0.10457219,
  -0.15374434,
  -0.18251258,
  -0.102815196,
  0.5238097,
  0.66373867,
  -0.04535445

In [16]:
data_small_img

[[0.48371515,
  0.2657639,
  -0.11943403,
  -0.04815736,
  -0.17925572,
  -0.1555734,
  1.1879635,
  0.95213133,
  -0.08699582,
  -0.3978957,
  -0.5550085,
  -0.3190761,
  1.2274517,
  0.69606096,
  -0.40306824,
  -0.74522626,
  -0.5575304,
  -0.429101,
  -0.221501,
  -0.0104826335,
  -0.06338717,
  -0.2623048,
  -0.32365164,
  -0.30866638,
  -0.59113926,
  -0.38998398,
  0.026108628,
  -0.020250373,
  0.113928415,
  0.026257332],
 [0.39860585,
  0.028650507,
  -0.2038061,
  -0.117414735,
  -0.13642673,
  -0.11485608,
  0.5884407,
  0.4992962,
  -0.3288946,
  -0.3678324,
  -0.41331512,
  -0.14581548,
  1.3066396,
  0.8063955,
  -0.2981349,
  -0.65275633,
  -0.687127,
  -0.38372338,
  0.027267631,
  -0.17643781,
  -0.3131073,
  -0.43814847,
  -0.38584268,
  -0.26136133,
  -0.38634947,
  -0.35429573,
  -0.0733114,
  -0.14287306,
  -0.045190115,
  0.021991761],
 [0.0433571,
  0.010884679,
  -0.10457219,
  -0.15374434,
  -0.18251258,
  -0.102815196,
  0.5238097,
  0.66373867,
  -0.04535445

In [17]:
print('shape of original data:',np.shape(data))
print('shape of preprocessed data:',np.shape(data_small_img))

print('\n All channels: \n',data_channels[:])
# Select which ones need to be removed from data here:
remove_channels = ['photo','MKR1+','MKR2+','MKR3+','MKR4+','ECG-','ECG+','HLG1','HLG2','HLG3','HLG4','HLG5','HLG6','HLG7','HLG8','HLG9','HLG10','HLG11','HLG12','HLG13','HLG14','HLG15','HLG16','HLG17','HLG18']

shape of original data: (171, 2495, 800)
shape of preprocessed data: (171, 30)

 All channels: 
 ['GA1', 'GA2', 'GA3', 'GA4', 'GA5', 'GA6', 'GA7', 'GA8', 'GB1', 'GB2', 'GB3', 'GB4', 'GB5', 'GB6', 'GB7', 'GB8', 'GC1', 'GC2', 'GC3', 'GC4', 'GC5', 'GC6', 'GC7', 'GC8', 'GD1', 'GD2', 'GD3', 'GD4', 'GD5', 'GD6', 'GD7', 'GD8', 'GE1', 'GE2', 'GE3', 'GE4', 'GE5', 'GE6', 'GE7', 'GE8', 'GF1', 'GF2', 'GF3', 'GF4', 'GF5', 'GF6', 'GF7', 'GF8', 'GG1', 'GG2', 'GG3', 'GG4', 'GG5', 'GG6', 'GG7', 'GG8', 'GH1', 'GH2', 'GH3', 'GH4', 'GH5', 'GH6', 'GH7', 'GH8', 'MKR1+', 'TA1', 'TA2', 'TA3', 'TA4', 'TA5', 'TA6', 'TM1', 'TM2', 'TM3', 'TM4', 'TM5', 'TM6', 'TP1', 'TP2', 'TP3', 'TP4', 'TP5', 'TP6', 'OI1', 'OI2', 'OI3', 'OI4', 'OI5', 'OI6', 'OS1', 'OS2', 'OS3', 'OS4', 'OS5', 'OS6', 'IMG1', 'IMG2', 'IMG3', 'IMG4', 'IMG5', 'IMG6', 'IMG7', 'IMG8', 'IMG9', 'IMG10', 'IMG11', 'IMG12', 'IMG13', 'IMG14', 'IMG15', 'IMG16', 'IMG17', 'IMG18', 'EX1', 'EX2', 'EX3', 'EX4', 'EX5', 'EX6', 'EX7', 'EX8', 'EX9', 'EX10', 'EX11', 'EX

# Create Dataframe from flattened data

In [18]:


df_img = pd.DataFrame(data_small_img,index=data_channels)
# df_img.drop(['photo','MKR1+','MKR2+','MKR3+','MKR4+','ECG-','ECG+'],inplace=True,axis=0)

df_aud = pd.DataFrame(data_small_aud,index=data_channels)
# df_aud.drop(['photo','MKR1+','MKR2+','MKR3+','MKR4+','ECG-','ECG+'],inplace=True,axis=0)

df_read = pd.DataFrame(data_small_read,index=data_channels)
# df_read.drop(['photo','MKR1+','MKR2+','MKR3+','MKR4+','ECG-','ECG+'],inplace=True,axis=0)

df = pd.DataFrame()
df = pd.concat([df_img,df_aud,df_read])
df['Class']=['img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','img','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','aud','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read','read']
df.drop(remove_channels,inplace=True,axis=0)

# data_prep = get_reduced_freq_time_matrix(df.values,freqs,tims)


df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,Class
GA1,0.483715,0.265764,-0.119434,-0.048157,-0.179256,-0.155573,1.187963,0.952131,-0.086996,-0.397896,...,-0.262305,-0.323652,-0.308666,-0.591139,-0.389984,0.026109,-0.020250,0.113928,0.026257,img
GA2,0.398606,0.028651,-0.203806,-0.117415,-0.136427,-0.114856,0.588441,0.499296,-0.328895,-0.367832,...,-0.438148,-0.385843,-0.261361,-0.386349,-0.354296,-0.073311,-0.142873,-0.045190,0.021992,img
GA3,0.043357,0.010885,-0.104572,-0.153744,-0.182513,-0.102815,0.523810,0.663739,-0.045354,-0.133836,...,0.809905,0.266149,-0.036997,-0.131652,-0.203400,0.007803,-0.021141,0.132494,0.046397,img
GA4,0.410730,0.080677,0.036813,0.046381,-0.096244,-0.080055,-0.080726,-0.129069,-0.340858,-0.052157,...,0.984634,0.482303,0.260356,-0.220176,-0.091140,-0.056507,-0.014549,0.051525,0.017754,img
GA5,-0.143545,0.366892,0.107602,-0.191617,-0.166783,-0.030808,1.012558,0.869969,0.159457,-0.519511,...,0.458651,0.248589,-0.063716,-0.572379,-0.618667,-0.166705,-0.059766,-0.031125,-0.044360,img
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
IPG14,0.233755,0.090252,0.051056,-0.237583,-0.310288,-0.108078,1.925159,1.979032,-0.004408,-1.011199,...,-1.006213,-2.837413,-2.158643,0.018463,-0.007864,-0.077315,-0.021646,-0.366707,-0.484075,read
IPG15,-0.029763,-0.220814,0.065449,-0.325920,-0.478035,-0.159640,1.760959,2.038296,-0.263004,-1.753400,...,-2.118024,-3.239515,-2.230726,-0.424178,-0.436135,-0.180312,-0.041112,-0.200932,-0.442266,read
IPG16,-0.017537,-0.097940,0.025661,-0.331781,-0.434525,-0.200408,2.122744,2.409902,-0.222750,-1.837415,...,-2.108273,-3.297243,-2.187434,-0.583987,-0.637213,-0.171350,-0.050824,-0.178902,-0.429087,read
IPG17,-0.041568,-0.093673,0.019050,-0.318210,-0.421468,-0.174003,2.135268,2.442220,-0.188955,-1.758806,...,-2.149981,-3.268004,-2.168905,-0.541484,-0.661609,-0.136475,-0.056355,-0.220872,-0.405714,read


#### Basic statistics

In [19]:
df.iloc[:,:].describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
count,438.0,438.0,438.0,438.0,438.0,438.0,438.0,438.0,438.0,438.0,...,438.0,438.0,438.0,438.0,438.0,438.0,438.0,438.0,438.0,438.0
mean,0.1157,0.08482,-0.051254,-0.180588,-0.31616,-0.185487,1.152099,0.980974,0.067953,-0.255114,...,-0.085249,-0.371303,-1.233649,-1.093457,-0.484238,-0.380564,-0.064272,0.09164,0.167726,0.020193
std,0.288676,0.218599,0.132674,0.188084,0.202884,0.16095,1.018045,0.95178,0.544479,1.244691,...,0.373952,0.918143,1.149252,1.016466,0.483363,0.39982,0.226803,0.370989,0.520494,0.399551
min,-0.931134,-0.700454,-0.609152,-1.248175,-1.198874,-0.73781,-3.739925,-4.470347,-1.277529,-3.118599,...,-1.552981,-3.300223,-4.069169,-3.72517,-3.258931,-2.880514,-1.484233,-1.48294,-1.127909,-1.116112
25%,-0.078453,-0.054155,-0.122762,-0.266672,-0.436965,-0.274397,0.569312,0.463744,-0.19864,-0.810424,...,-0.26484,-0.735521,-1.937182,-1.771643,-0.748738,-0.627905,-0.149875,-0.120264,-0.165146,-0.239552
50%,0.12046,0.089247,-0.046679,-0.146094,-0.300759,-0.140706,1.089323,0.936022,-0.011572,-0.328969,...,-0.079373,-0.249108,-1.038995,-0.782952,-0.452946,-0.329507,-0.05452,0.010606,0.063271,0.013591
75%,0.321937,0.223787,0.028316,-0.043923,-0.163867,-0.069509,1.677633,1.404443,0.204712,-0.06366,...,0.104436,0.043174,-0.349083,-0.261452,-0.243375,-0.160282,0.021423,0.210689,0.356364,0.235821
max,1.0448,0.919902,0.537519,0.242831,0.213296,0.198795,7.632792,5.897777,3.750516,8.595932,...,1.620946,2.793798,2.063274,1.480843,2.227054,1.604169,1.15879,1.667377,2.077843,1.467995


## Save df file: 

In [20]:
from datetime import date
import os 
today = date.today()

file_name = '/Users/lorafanda/Documents/Coding/Experiments_PsychoPy/Experiments_PsychoPy/Analysis_Data/PAT_3415_wref/ERSP_img.mat'
save_name = os.path.join(file_name.split('/')[-2],file_name.split('/')[-2] +'_'+ str(today)  + '.pkl')

df.to_pickle(save_name)
print("File saved as: ", save_name)

# TO load: 


File saved as:  PAT_3415_wref/PAT_3415_wref_2023-08-27.pkl
