In [19]:
import pandas as pd
from scipy.stats import describe as desc
import matplotlib.pyplot as plt
import numpy as np
from sklearn import preprocessing
from sklearn.decomposition import PCA, FastICA
import os
from firstPlots import visualize_channels

In [20]:
annotations = {
    'raz': ['raz3-3_lc_ts.csv', 'raz3-3_lr_ts.csv', 'raz3-3_rc2_ts.csv',
            'raz3-3_ud_ts.csv', 'raz_3-3_blinks_ts.csv'],
    'yon': ['ts_blinks_yon23-2.csv', 'ts_eg1_yon23-2.csv']
}


ann_paths = {'raz': 'data/raz_3-3/', 'yon': 'data/yonatan_23-2/'}


ann_files = [ann_paths[subject] + ann for subject in annotations.keys() for ann in annotations[subject]]


In [46]:
yon = 'data/yonatan_23-2'
raz = 'data/raz_3-3'
michael = 'data/michael_3-3'
filename = '2025_03_03_1303_raz_blinks_no_metronome.csv'
filepath = os.path.join(raz, filename)
df = pd.read_csv(filepath)

In [22]:
# plot data
visualize_channels(df, 'original data')



In [27]:
# run PCA
file_names = []
for f in os.listdir(raz):
    if f.endswith('.csv') and f.startswith('2025'):
        file_names.append(f)


In [33]:
# standardized data
def run_pca(df, n=8):
    scaler = preprocessing.StandardScaler()
    df_scaled = pd.DataFrame(scaler.fit_transform(df.drop(columns=['timestamp'])), columns=df.columns[1:])
    
    df_scaled['timestamp'] = df['timestamp']
    cols = ['timestamp'] + [col for col in df_scaled.columns if col != 'timestamp']
    df_scaled = df_scaled[cols]
    
    pca = PCA(n_components=n)
    df_features = df_scaled.drop(columns=['timestamp'])
    pca_result = pca.fit_transform(df_features)
    pca_columns = [f'PC{i+1}' for i in range(n)]
    df_pca = pd.DataFrame(pca_result, columns=pca_columns)
    df_pca['timestamp'] = df_scaled['timestamp']
    cols = ['timestamp'] + [col for col in df_pca.columns if col != 'timestamp']
    df_pca = df_pca[cols]
    print(pca.explained_variance_ratio_)
    print(np.sum(pca.explained_variance_ratio_))
    
    return df_pca, pca_result
    








In [45]:
for f in file_names:
    df = pd.read_csv(os.path.join(raz, f))
    print(f)
    run_pca(df, 8)
    print()
    

In [48]:
def run_ica(df, n=8):
    """
    Run ICA on X, which is the data after pca
    :param X: 
    :param n: number of ica components
    :return: 
    """
    df_pca, X = run_pca(df,n)
    ica = FastICA(n_components=n, random_state=42)
    X_ica = ica.fit_transform(X)
    df_ica = pd.DataFrame(X_ica, columns=[f"IC{i+1}" for i in range(n)])
    df_ica['timestamp'] = df_pca['timestamp']
    cols = ['timestamp'] + [col for col in df_ica.columns if col != 'timestamp']
    df_ica = df_pca[cols]
    
    return df_ica, X_ica
    
    
    

In [50]:
ica, _ = run_ica(df)
visualize_channels(ica, filename + ' ICA')
df_pca, X = run_pca(df, 8)
visualize_channels(df_pca, filename + ' PCA')
visualize_channels(df, filename + ' original data')