In [3]:
import scipy.io
import numpy as np
import pandas as pd
import os

## PRE-PROCESSING LABELS

In [36]:
df = pd.read_csv(r'D:\Internship\PPG\raw_label.csv')
df.head()

Unnamed: 0,Session,Index,Timestamp,Date,Time,O2 Saturation,Beats / min,Perfusion Index,Pleth Variability,Breaths / min,Time Stamp,Unnamed: 11
0,1,1,1678092555,3/6/2023,11:49:15,96,69,5.8,--,--,6/3/2023 8:49,3/6/2023 8:49
1,1,2,1678092556,3/6/2023,11:49:16,96,68,6.0,--,--,6/3/2023 8:49,
2,1,3,1678092557,3/6/2023,11:49:17,96,68,6.0,--,--,6/3/2023 8:49,
3,1,4,1678092558,3/6/2023,11:49:18,97,68,6.1,--,--,6/3/2023 8:49,
4,1,5,1678092559,3/6/2023,11:49:19,97,68,6.1,--,--,6/3/2023 8:49,


In [41]:
df['Beats / min'].head()

0    69
1    68
2    68
3    68
4    68
Name: Beats / min, dtype: int64

In [44]:
# Initialize an empty list to store the mean values
means_list = []
# Calculate the number of chunks
num_chunks = len(df) // 10 + (1 if len(df) % 10 else 0)

for i in range(num_chunks):
    # Calculate start and end indices for each chunk of 10 rows
    start_idx = i * 10
    end_idx = start_idx + 10
    
    # Calculate the mean of the current chunk for "Beats / min" column
    chunk_mean = df['Beats / min'][start_idx:end_idx].mean()
    
    # Append the mean to the list, if the chunk is not empty
    if not np.isnan(chunk_mean):
        means_list.append(chunk_mean)

# Create a new DataFrame for the mean values
mean_df = pd.DataFrame(means_list, columns=['labels'])

# Save the resulting DataFrame to a new CSV file
mean_df.to_csv(r'D:\Internship\PPG\labels.csv', index=False)

print(f"File saved successfully with {len(mean_df)} records.")


File saved successfully with 570 records.


# PRE-PROCESSING FEATURES

In [4]:
import scipy.io
import numpy as np
import os

In [7]:
input_dir = r'D:\Internship\PPG\filteredandpeakdetectedsignals'
output_dir = r'D:\Internship\PPG\processed_2'

In [8]:
files = os.listdir(input_dir)
print(files)

['S10denoised.mat', 'S11denoised.mat', 'S12denoised.mat', 'S13denoised.mat', 'S14denoised.mat', 'S15denoised.mat', 'S16denoised.mat', 'S17denoised.mat', 'S18denoised.mat', 'S19denoised.mat', 'S1denoised.mat', 'S20denoised.mat', 'S21denoised.mat', 'S2denoised.mat', 'S3denoised.mat', 'S4denoised.mat', 'S5denoised.mat', 'S6denoised.mat', 'S7denoised.mat', 'S8denoised.mat', 'S9denoised.mat', 'vPPG_sync.asv', 'vPPG_sync.m']


In [9]:
for file in files:
    if file.endswith('.mat'):
        file_path = os.path.join(input_dir,file)
        data = scipy.io.loadmat(file_path)['d_noise_removed']
        data = data.squeeze() #remove 1st dimension
        
        splits = int(len(data)/250)
        sub_data = np.array_split(data,splits)
        
        for index,sub_array in enumerate(sub_data):
            np.save(f"{output_dir}\{file}_{index}",sub_array)

## REGRESSION MODEL

In [4]:
# Specify the directory containing your .npy files
npy_files_dir = r'D:\Internship\PPG\Vitals_Using_PPG\dataset\features'

# List all files in the directory and filter for .npy files
npy_files = [file for file in os.listdir(npy_files_dir) if file.endswith('.npy')]

# Sort the files if they are in a sequential order but named differently
# npy_files.sort()

# Load the .npy files
features = []
for file in npy_files:
    filepath = os.path.join(npy_files_dir, file)
    sample = np.load(filepath)
    
    # Truncate or keep the sample to the first 250 elements
    if sample.shape[0] > 250:
        truncated_sample = sample[:250]
    else:
        truncated_sample = sample
    
    features.append(truncated_sample)

# Convert the list of numpy arrays into a single numpy array
X = np.array(features)

# Load labels
labels_path = r'D:\Internship\PPG\Vitals_Using_PPG\dataset/labels.csv'
y = pd.read_csv(labels_path)

# Assuming your labels are in the first column
y = y['labels'].values


In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
from sklearn.linear_model import LinearRegression

# Initialize the model
model = LinearRegression()

# Fit the model
model.fit(X_train, y_train)

LinearRegression()

In [8]:
from sklearn.metrics import mean_squared_error, r2_score 
from sklearn.metrics import mean_absolute_error

# Make predictions using the testing set
y_pred = model.predict(X_test)

# The mean squared error
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))
# Calculate the MAE
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae:.2f}')

Mean squared error: 10937389.66
Coefficient of determination: -111422.31
Mean Absolute Error: 483.00
