In [6]:
import pandas as pd 

df = pd.read_csv("telemonitoring_parkinsons_updrs.data.csv")


In [7]:
df = df.drop(columns=['subject#', 'age', 'sex', 'test_time', 'total_UPDRS'])

In [9]:
features = df.drop(columns=['motor_UPDRS'])
target = df['motor_UPDRS']

In [13]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
features_normalized = scaler.fit_transform(features)

In [16]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Initialize and train the regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# Make predictions
y_pred = regressor.predict(X_test)

print(X_test.iloc[0])  # Use .iloc to access the first row of the DataFrame
print(y_test.iloc[0])  # Use .iloc to access the first value of the Series
print(y_pred[0])       # Access the first value of the numpy array

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Jitter(%)         0.009890
Jitter(Abs)       0.000070
Jitter:RAP        0.006200
Jitter:PPQ5       0.004190
Jitter:DDP        0.018600
Shimmer           0.041390
Shimmer(dB)       0.329000
Shimmer:APQ3      0.024940
Shimmer:APQ5      0.020160
Shimmer:APQ11     0.026790
Shimmer:DDA       0.074810
NHR               0.049333
HNR              23.543000
RPDE              0.493150
DFA               0.644720
PPE               0.143890
Name: 5366, dtype: float64
33.084
18.177449390632482
Mean Squared Error: 58.97586482228757


In [19]:
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

# Define the model
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),  # Input layer
    Dense(128, activation='relu'),  # Second hidden layer, change 182 to a more typical value
    Dense(32, activation='relu'),  # Third hidden layer
    Dense(1, activation='linear')  # Output layer for regression
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Early stopping to prevent overfitting (optional)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the model
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f"Test MAE: {test_mae}")

# Predictions
predictions = model.predict(X_test)


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 229.2688 - mae: 12.3971 - val_loss: 85.7475 - val_mae: 7.6881
Epoch 2/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 86.6303 - mae: 7.7178 - val_loss: 68.5146 - val_mae: 6.9839
Epoch 3/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 70.1474 - mae: 7.0714 - val_loss: 65.8273 - val_mae: 6.8618
Epoch 4/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 70.6957 - mae: 7.0998 - val_loss: 66.8089 - val_mae: 6.8128
Epoch 5/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 70.0106 - mae: 7.1363 - val_loss: 65.0619 - val_mae: 6.7781
Epoch 6/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 68.3156 - mae: 7.0080 - val_loss: 64.7685 - val_mae: 6.7602
Epoch 7/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

In [20]:
print(min(target))
print(max(target))

5.0377
39.511


In [22]:
import librosa
import numpy as np

def extract_features(audio_file):
    # Load the audio file
    y, sr = librosa.load(audio_file)

    # --- Jitter Features ---
    # Jitter: percentage, absolute, RAP, PPQ5, DDP
    # Calculate fundamental frequency (F0) using librosa's pitch detection
    f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=librosa.note_to_hz('C1'), fmax=librosa.note_to_hz('C8'))

    jitter_percent = (np.std(f0) / np.mean(f0) * 100) if np.mean(f0) != 0 else 0
    jitter_abs = np.std(f0)
    jitter_rap = (np.mean(np.abs(np.diff(f0)) / f0[:-1])) if len(f0) > 1 else 0
    jitter_ppq5 = (np.mean(np.abs(np.diff(f0)) / np.concatenate(([f0[0]], f0[:-1])))) if len(f0) > 1 else 0
    jitter_ddp = np.max(np.abs(np.diff(f0))) if len(f0) > 1 else 0

    # --- Shimmer Features ---
    # Shimmer calculations require amplitude variation
    # We can use librosa's amplitude envelope estimation
    amplitude_envelope = librosa.onset.onset_strength(y=y, sr=sr)
    
    shimmer = (np.std(amplitude_envelope) / np.mean(amplitude_envelope)) if np.mean(amplitude_envelope) != 0 else 0
    shimmer_db = (20 * np.log10(np.std(amplitude_envelope))) if np.std(amplitude_envelope) > 0 else 0
    shimmer_apq3 = (np.mean(np.abs(np.diff(amplitude_envelope)) / amplitude_envelope[:-1])) if len(amplitude_envelope) > 1 else 0
    shimmer_apq5 = (np.mean(np.abs(np.diff(amplitude_envelope)) / np.concatenate(([amplitude_envelope[0]], amplitude_envelope[:-1])))) if len(amplitude_envelope) > 1 else 0
    shimmer_apq11 = (np.mean(np.abs(np.diff(amplitude_envelope)) / np.concatenate(([amplitude_envelope[0]], amplitude_envelope[:-1])))) if len(amplitude_envelope) > 1 else 0
    shimmer_dda = (np.mean(np.abs(np.diff(amplitude_envelope)) / amplitude_envelope[:-1])) if len(amplitude_envelope) > 1 else 0

    # --- Noise-to-Harmonics Ratio (NHR) and Harmonics-to-Noise Ratio (HNR) ---
    # NHR and HNR measure the ratio of harmonics to noise in the signal
    # librosa can help calculate the spectral flatness to estimate noise ratio
    spectral_flatness = librosa.feature.spectral_flatness(y=y)
    nhr = np.mean(spectral_flatness)
    hnr = 1 / (1 + nhr)  # Inverse of NHR (simplified assumption)

    # --- Nonlinear Features (RPDE, DFA, PPE) ---
    # RPDE, DFA, and PPE are typically more advanced and are usually extracted using specialized libraries
    # These would require custom calculations or usage of specialized libraries like pyAudioAnalysis or pyWorld
    rpde = np.random.random()  # Placeholder: Calculate RPDE here if needed
    dfa = np.random.random()    # Placeholder: Calculate DFA here if needed
    ppe = np.random.random()   # Placeholder: Calculate PPE here if needed

    # Return features as a dictionary
    return {
        'Jitter(%)': jitter_percent,
        'Jitter(Abs)': jitter_abs,
        'Jitter:RAP': jitter_rap,
        'Jitter:PPQ5': jitter_ppq5,
        'Jitter:DDP': jitter_ddp,
        'Shimmer': shimmer,
        'Shimmer(dB)': shimmer_db,
        'Shimmer:APQ3': shimmer_apq3,
        'Shimmer:APQ5': shimmer_apq5,
        'Shimmer:APQ11': shimmer_apq11,
        'Shimmer:DDA': shimmer_dda,
        'NHR': nhr,
        'HNR': hnr,
        'RPDE': rpde,
        'DFA': dfa,
        'PPE': ppe
    }

# Example usage
audio_file = "amir.wav"
features = extract_features(audio_file)
print(features)


ValueError: operands could not be broadcast together with shapes (167,) (168,) 