In [119]:
import pandas as pd
import os
import pandas as pd
import numpy as np
import pywt
from scipy.stats import entropy
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, r2_score, classification_report
import pywt

from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

In [38]:
# Path to your data folder
data_folder = './data/csv'
file_path = './data/combined_sEMG_data_without_extra_rest.csv'

In [39]:
# df = pd.read_csv(file_path, header=None, names=['muscle_1', 'muscle_2', 'muscle_3', 'muscle_4', 'subject_id', 'activity_id', 'cycle_id'])
df = pd.read_csv(file_path, header=0, names=['muscle_1', 'muscle_2', 'muscle_3', 'muscle_4', 'subject_id', 'activity_id', 'cycle_id'])

In [41]:
print(f" - subject : \n{df['subject_id'].value_counts()}")
print(f" - cycle : \n{df['cycle_id'].value_counts()}")
print(f" - activity : \n{df['activity_id'].value_counts()}")

 - subject : 
subject_id
1     600000
2     600000
23    600000
24    600000
25    600000
26    600000
27    600000
28    600000
29    600000
30    600000
31    600000
32    600000
33    600000
34    600000
35    600000
36    600000
37    600000
38    600000
39    600000
22    600000
21    600000
20    600000
10    600000
3     600000
4     600000
5     600000
6     600000
7     600000
8     600000
9     600000
11    600000
19    600000
12    600000
13    600000
14    600000
15    600000
16    600000
17    600000
18    600000
40    600000
Name: count, dtype: int64
 - cycle : 
cycle_id
1    4800000
2    4800000
3    4800000
4    4800000
5    4800000
Name: count, dtype: int64
 - activity : 
activity_id
1     2400000
2     2400000
3     2400000
4     2400000
5     2400000
6     2400000
7     2400000
8     2400000
9     2400000
10    2400000
Name: count, dtype: int64


In [42]:
class EMGSignalProcessor:
    def __init__(self, data, subjects, sampling_rate=2000, window_ms=250):
        """
        Initializes the EMG Signal Processor.

        Parameters:
        - data (pd.DataFrame): The input data containing EMG signals with subject, cycle, and activity information.
        - subjects (list): List of subject IDs to process.
        - sampling_rate (int): Sampling rate of the signal in Hz.
        - window_ms (int): Window size in milliseconds for wavelet transformation.
        """
        self.data = data
        self.subjects = subjects
        self.sampling_rate = sampling_rate
        self.window_samples = int((window_ms / 1000) * sampling_rate)  # Convert window ms to samples

    def segment_data(self, subject_id, cycle_id, activity_id):
        """
        Segments data based on subject, cycle, and activity IDs.

        Returns:
        - np.array: Array of signal values for the specified segment.
        """
        segment = self.data[(self.data['subject_id'] == subject_id) &
                            (self.data['cycle_id'] == cycle_id) &
                            (self.data['activity_id'] == activity_id)]
        return segment[['muscle_1', 'muscle_2', 'muscle_3', 'muscle_4']].values

In [44]:
class WaveletFeatureExtractor:
    def __init__(self, wavelet_name='bior2.2', levels=5):
        """
        Initializes the Wavelet Feature Extractor.

        Parameters:
        - wavelet_name (str): The name of the mother wavelet to use.
        - levels (int): Number of decomposition levels for the DWT.
        """
        self.wavelet_name = wavelet_name
        self.levels = levels

    def extract_features(self, signal_segment):
        """
        Extracts features from a signal segment using DWT.

        Parameters:
        - signal_segment (np.array): Segment of EMG data for a single activity.

        Returns:
        - list: List of features extracted from the wavelet coefficients.
        """
        features = []
        for channel in signal_segment.T:
            coeffs = pywt.wavedec(channel, self.wavelet_name, level=self.levels)
            for coeff in coeffs:
                # Example statistical features from each level's coefficients
                features.append(np.mean(coeff))
                features.append(np.std(coeff))
                features.append(np.var(coeff))
                features.append(entropy(np.abs(coeff)))
        return features


In [45]:

class DataTransformer:
    def __init__(self, processor, feature_extractor, scaler=None):
        """
        Initializes the Data Transformer to apply wavelet transform and structure data.

        Parameters:
        - processor (EMGSignalProcessor): Instance of EMGSignalProcessor for data segmentation.
        - feature_extractor (WaveletFeatureExtractor): Instance of WaveletFeatureExtractor for feature extraction.
        - scaler (StandardScaler): Optional scaler for feature normalization.
        """
        self.processor = processor
        self.feature_extractor = feature_extractor
        self.scaler = scaler

    def transform_data(self):
        """
        Transforms the data into a structured dataframe with wavelet features.

        Returns:
        - pd.DataFrame: DataFrame containing wavelet features for each segment.
        """
        all_features = []
        for subject_id in self.processor.subjects:
            for cycle_id in range(6):  # 0 for rest and 1 - 5 for cycles
                for activity_id in range(1,11):  # 1 - 10 activities
                    segment = self.processor.segment_data(subject_id, cycle_id, activity_id)
                    if segment.shape[0] >= self.processor.window_samples:
                        for start in range(0, segment.shape[0] - self.processor.window_samples + 1, self.processor.window_samples):
                            window = segment[start:start + self.processor.window_samples]
                            features = self.feature_extractor.extract_features(window)
                            features.append(subject_id)
                            features.append(cycle_id)
                            features.append(activity_id)
                            all_features.append(features)

        columns = [f'feature_{i}' for i in range(len(all_features[0]) - 3)] + ['subject_id', 'cycle_id', 'activity_id']
        df_features = pd.DataFrame(all_features, columns=columns)
        
        if self.scaler:
            features = df_features.drop(['subject_id', 'cycle_id', 'activity_id'], axis=1)
            df_features[features.columns] = self.scaler.fit_transform(features)
        
        return df_features


In [46]:
# subjects = [1, 2, 3, 4, 5]
subjects = [1]
sampling_rate = 2000

# Initialize processor, feature extractor, and data transformer
processor = EMGSignalProcessor(data=df, subjects=subjects, sampling_rate=sampling_rate)
feature_extractor = WaveletFeatureExtractor(wavelet_name='bior2.2', levels=5)
scaler = StandardScaler()
transformer = DataTransformer(processor, feature_extractor, scaler)

# Transform data and get the features DataFrame
df_features = transformer.transform_data()

In [47]:
df_features.shape

(540, 99)

In [49]:
df_features.iloc[0:10, :]

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_89,feature_90,feature_91,feature_92,feature_93,feature_94,feature_95,subject_id,cycle_id,activity_id
0,-0.056096,-0.536045,-0.303615,0.699025,0.181079,-0.375409,-0.281544,1.32832,0.21854,-0.349038,...,-0.411684,-0.339799,-1.040315,0.047688,-0.403293,-0.339435,-0.821679,1,1,1
1,-0.111284,-0.575677,-0.310024,0.273538,0.349079,-0.458058,-0.299284,-0.01351,0.220915,-0.357145,...,-0.42583,-0.341739,-1.573776,-0.054394,-0.454567,-0.346192,-1.562976,1,1,1
2,-0.654025,-0.248808,-0.241599,0.150813,-0.159454,-0.182141,-0.230918,-0.343611,-0.201247,-0.470674,...,-0.49263,-0.349596,-1.356942,-0.104342,-0.475518,-0.348587,-1.823134,1,1,1
3,-0.294782,-0.589486,-0.312135,0.542575,0.208583,-0.470805,-0.301811,-2.117469,0.299454,-0.341652,...,-0.514907,-0.351738,-1.00927,0.061113,-0.482623,-0.349351,-1.798897,1,1,1
4,0.216487,-0.347945,-0.266094,0.40023,0.616044,-0.459047,-0.299482,1.46725,0.07283,-0.323192,...,-0.469917,-0.347166,-1.592051,0.116336,-0.477606,-0.348814,-1.431885,1,1,1
5,0.223182,-0.405252,-0.278768,0.600971,-1.140346,-0.320247,-0.268401,-0.179878,-0.025639,-0.265124,...,-0.513163,-0.351579,-2.521445,0.018147,-0.520237,-0.352988,-1.818965,1,1,1
6,-0.043446,-0.537557,-0.303869,0.244969,-0.271572,-0.329113,-0.270584,-0.394187,-0.765778,-0.378613,...,-0.517027,-0.351929,-0.733891,-0.016094,-0.521562,-0.353104,-1.142762,1,1,1
7,0.092635,-0.33297,-0.262603,0.96076,-0.050343,-0.374306,-0.281292,-1.025693,-0.445672,-0.15946,...,-0.477964,-0.348055,-0.98954,-0.064311,-0.479957,-0.349067,-1.099276,1,1,1
8,0.283473,-0.48217,-0.294067,0.385198,-0.229424,-0.367437,-0.279709,-0.627297,0.364894,-0.511996,...,-0.520586,-0.352246,-1.287541,0.220769,-0.507148,-0.3518,-1.20152,1,1,1
9,0.052698,-0.510987,-0.299294,0.98443,-0.514015,-0.429735,-0.293468,0.507155,0.33661,-0.502056,...,-0.502191,-0.350544,-1.286685,-0.061721,-0.522286,-0.353166,-1.254565,1,1,1


### **Wavelet Transform and the 96 Columns**
1. **Wavelet Decomposition Levels (`levels=5`)**:
   - With `levels=5`, the signal is decomposed into **5 detail coefficients** (`D1, D2, ..., D5`) and **1 approximation coefficient** (`A5`).
   - This gives **6 sets of coefficients** for each channel.

2. **Number of Channels**:
   - You have 4 EMG channels (`channel_1`, `channel_2`, `channel_3`, `channel_4`).
   - For each channel, you calculate features for all **6 sets of coefficients** (`D1, ..., D5, A5`).

3. **Features for Each Set of Coefficients**:
   - In the example code, we compute **4 statistical features** for each set of coefficients:
     - **Mean**
     - **Standard Deviation**
     - **Variance**
     - **Entropy**

4. **Calculation**:
   - For each channel: \( 6 \, \text{(sets of coefficients)} \times 4 \, \text{(features)} = 24 \, \text{columns} \).
   - For 4 channels: \( 24 \times 4 = 96 \, \text{columns (features)} \).

In [50]:
## More Preprocessing

In [51]:
if df_features.isnull().values.any():
    print("yes NaN found")

In [52]:
X = df_features.drop(['subject_id', 'cycle_id', 'activity_id'], axis=1)
y = df_features['activity_id']

In [53]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)


In [54]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)


In [55]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [56]:
from sklearn.decomposition import PCA
pca = PCA(n_components=30)  # Keep 30 components
X = pca.fit_transform(X)

In [57]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(class_weight='balanced')


In [58]:
from sklearn.svm import SVC
model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
model.fit(X_train, y_train)


In [59]:
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [0.1, 1, 10], 'gamma': ['scale', 'auto'], 'kernel': ['rbf']}
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2, cv=5)
grid.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=auto, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=auto, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=auto, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=auto, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=auto, kernel=rbf; total time=   0.0s
[CV] END .......................C=1, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END .......................C=1, gamma=scale,

In [60]:
from sklearn.metrics import classification_report
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           1       0.71      0.83      0.77        12
           2       1.00      1.00      1.00        12
           3       1.00      1.00      1.00        12
           4       0.67      0.67      0.67        12
           5       1.00      0.67      0.80        12
           6       1.00      0.75      0.86        12
           7       0.64      0.58      0.61        12
           8       0.44      0.58      0.50        12
           9       0.64      0.75      0.69        12

    accuracy                           0.76       108
   macro avg       0.79      0.76      0.77       108
weighted avg       0.79      0.76      0.77       108



In [146]:
# Preprocessor
class DataPreprocessor:
    def __init__(self, apply_pca=False, n_components=30):
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        self.apply_pca = apply_pca
        self.pca = PCA(n_components=n_components) if apply_pca else None

    def preprocess(self, df):
        # Handle missing values
        df.fillna(df.mean(), inplace=True)

        # Separate features and labels
        X = df.iloc[:, :-3]  # Features
        y = df['activity_id']  # Labels

        # Normalize features
        X = self.scaler.fit_transform(X)
        
        # Apply PCA if enabled
        if self.apply_pca and self.pca:
            X = self.pca.fit_transform(X)

        # Encode labels
        y = self.label_encoder.fit_transform(y)

        # Split data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

        return X_train, X_test, y_train, y_test

In [152]:
class ModelTrainer:
    def __init__(self, models, param_grids):
        self.models = models
        self.param_grids = param_grids

    def train_and_evaluate(self, X_train, X_test, y_train, y_test):
        results = []

        for model_name, model in self.models.items():
            param_grid = self.param_grids.get(model_name, {})
            grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
            grid_search.fit(X_train, y_train)

            best_model = grid_search.best_estimator_
            y_pred = best_model.predict(X_test)

            accuracy = accuracy_score(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            report = classification_report(y_test, y_pred, output_dict=True)

            results.append({
                'model': model_name,
                'best_params': grid_search.best_params_,
                'accuracy': accuracy,
                'r2_score': r2,
                'classification_report': report
            })

        return results

In [153]:
class ExperimentRunner:
    def __init__(self, wavelets, levels, models, param_grids):
        self.wavelets = wavelets
        self.levels = levels
        self.models = models
        self.param_grids = param_grids
        self.results = []

    def run(self, df, processor, scaler, apply_pca=False, n_components=None):
        for wavelet in self.wavelets:
            for level in self.levels:
                print(f"Running experiment for Wavelet: {wavelet}, Levels: {level}")
                feature_extractor = WaveletFeatureExtractor(wavelet_name=wavelet, levels=level)
                transformer = DataTransformer(processor, feature_extractor, scaler)
                df_features = transformer.transform_data()

                # Preprocess Data
                preprocessor = DataPreprocessor(apply_pca=apply_pca, n_components=n_components)
                X_train, X_test, y_train, y_test = preprocessor.preprocess(df_features)

                # Train Models
                trainer = ModelTrainer(self.models, self.param_grids)
                model_results = trainer.train_and_evaluate(X_train, X_test, y_train, y_test)

                # Store Results
                for result in model_results:
                    result.update({'wavelet': wavelet, 'level': level})
                    self.results.append(result)

        return self.results

In [154]:
subjects = [1]
sampling_rate = 2000

In [None]:
processor = EMGSignalProcessor(data=df, subjects=subjects, sampling_rate=sampling_rate)

# Define models and parameter grids
models = {
    'RandomForest': RandomForestClassifier(random_state=42)
#     ,'SVM': SVC(random_state=42)
    ,'LogisticRegression': LogisticRegression(random_state=42, class_weight='balanced')
#     ,'KNN': KNeighborsClassifier()
}
param_grids = {
    'RandomForest': {
        'n_estimators': [50, 100, 150, 200],  # Broaden the range of forest sizes
#         'max_depth': [10, 13,  15, 17,  20, 25, None],  # Add intermediate depths
        'max_depth': [10, 13, 15, 17],  # Add intermediate depths
        'min_samples_split': [2, 3, 5],  # Introduce control over tree splits
        'min_samples_leaf': [1, 2, 3],  # Explore small leaves
        'bootstrap': [True, False]  # Test both sampling strategies
    }
#     ,'SVM': {
#         'C': [0.01, 0.1, 1, 10, 100],  # Capture broader regularization strength
#         'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],  # Add diversity
#         'gamma': ['scale', 'auto'],  # Include gamma for non-linear kernels
#         'degree': [2, 3, 4]  # For `poly` kernel
#     },
    'LogisticRegression': {
#         'C': [0.01, 0.1, 1, 10, 100],  # Broader regularization
        'C': [10,50,100],  # Broader regularization
#         'max_iter': [1000, 2000, 5000, 10000],  # Account for slow-convergence solvers
        'max_iter': [500, 1000],  # Account for slow-convergence solvers
#         'solver': ['lbfgs', 'saga', 'liblinear', 'newton-cg', 'sag']  # Cover all solvers
        'solver': ['liblinear']  # Cover all solvers
        
    }
#     ,'KNN': {
#         'n_neighbors': [3, 5, 7, 9, 11],  # Add larger neighbors
#         'weights': ['uniform', 'distance'],  # Test weighted voting
#         'metric': ['euclidean', 'manhattan', 'minkowski']  # Explore distance metrics
#     }
}

# Run experiments
wavelets = ['bior2.2', 'bior3.3', 'bior4.4', 'coif3', 'coif4', 'coif5', 'db4', 'db6', 'db8', 'sym4', 'sym6', 'sym8']
levels = [2, 3, 4, 5, 6]

In [156]:
experiment_runner = ExperimentRunner(wavelets, levels, models, param_grids)
results = experiment_runner.run(df, processor, StandardScaler())

Running experiment for Wavelet: bior2.2, Levels: 2




Running experiment for Wavelet: bior2.2, Levels: 3






Running experiment for Wavelet: bior2.2, Levels: 4






Running experiment for Wavelet: bior2.2, Levels: 5






Running experiment for Wavelet: bior2.2, Levels: 6




Running experiment for Wavelet: bior3.3, Levels: 2




Running experiment for Wavelet: bior3.3, Levels: 3




Running experiment for Wavelet: bior3.3, Levels: 4






Running experiment for Wavelet: bior3.3, Levels: 5






Running experiment for Wavelet: bior3.3, Levels: 6






Running experiment for Wavelet: bior4.4, Levels: 2




Running experiment for Wavelet: bior4.4, Levels: 3




Running experiment for Wavelet: bior4.4, Levels: 4




Running experiment for Wavelet: bior4.4, Levels: 5




Running experiment for Wavelet: bior4.4, Levels: 6




Running experiment for Wavelet: coif3, Levels: 2




Running experiment for Wavelet: coif3, Levels: 3




Running experiment for Wavelet: coif3, Levels: 4




Running experiment for Wavelet: coif3, Levels: 5




Running experiment for Wavelet: coif3, Levels: 6








Running experiment for Wavelet: coif4, Levels: 2




Running experiment for Wavelet: coif4, Levels: 3




Running experiment for Wavelet: coif4, Levels: 4




Running experiment for Wavelet: coif4, Levels: 5




Running experiment for Wavelet: coif4, Levels: 6






Running experiment for Wavelet: coif5, Levels: 2




Running experiment for Wavelet: coif5, Levels: 3




Running experiment for Wavelet: coif5, Levels: 4




Running experiment for Wavelet: coif5, Levels: 5




Running experiment for Wavelet: coif5, Levels: 6






Running experiment for Wavelet: db4, Levels: 2




Running experiment for Wavelet: db4, Levels: 3




Running experiment for Wavelet: db4, Levels: 4






Running experiment for Wavelet: db4, Levels: 5




Running experiment for Wavelet: db4, Levels: 6






Running experiment for Wavelet: db6, Levels: 2




Running experiment for Wavelet: db6, Levels: 3




Running experiment for Wavelet: db6, Levels: 4




Running experiment for Wavelet: db6, Levels: 5




Running experiment for Wavelet: db6, Levels: 6




Running experiment for Wavelet: db8, Levels: 2




Running experiment for Wavelet: db8, Levels: 3




Running experiment for Wavelet: db8, Levels: 4




Running experiment for Wavelet: db8, Levels: 5




Running experiment for Wavelet: db8, Levels: 6






Running experiment for Wavelet: sym4, Levels: 2




Running experiment for Wavelet: sym4, Levels: 3




Running experiment for Wavelet: sym4, Levels: 4




Running experiment for Wavelet: sym4, Levels: 5




Running experiment for Wavelet: sym4, Levels: 6






Running experiment for Wavelet: sym6, Levels: 2




Running experiment for Wavelet: sym6, Levels: 3




Running experiment for Wavelet: sym6, Levels: 4




Running experiment for Wavelet: sym6, Levels: 5




Running experiment for Wavelet: sym6, Levels: 6




Running experiment for Wavelet: sym8, Levels: 2




Running experiment for Wavelet: sym8, Levels: 3




Running experiment for Wavelet: sym8, Levels: 4




Running experiment for Wavelet: sym8, Levels: 5




Running experiment for Wavelet: sym8, Levels: 6




In [157]:
# Store results in a DataFrame
results_df = pd.DataFrame(results)
results_df.to_csv("./data/wavelet_experiment_results_window_250_pca_30.csv", index=False)
print(results_df.head())

                model  \
0        RandomForest   
1                 SVM   
2  LogisticRegression   
3        RandomForest   
4                 SVM   

                                                                                                 best_params  \
0  {'bootstrap': False, 'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 150}   
1                                                 {'C': 100, 'degree': 2, 'gamma': 'scale', 'kernel': 'rbf'}   
2                                                         {'C': 10, 'max_iter': 1000, 'solver': 'liblinear'}   
3   {'bootstrap': True, 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 150}   
4                                              {'C': 0.1, 'degree': 2, 'gamma': 'scale', 'kernel': 'linear'}   

   accuracy  r2_score  \
0  0.879630  0.936111   
1  0.833333  0.786111   
2  0.814815  0.819444   
3  0.861111  0.876389   
4  0.796296  0.533333   

                         

In [158]:
max_r2_score = results_df['r2_score'].max()
print(max_r2_score)

0.9708333333333333


In [110]:
# Set display option to show more characters in each column
pd.set_option('display.max_colwidth', None)

In [164]:
results_df[results_df['r2_score'] > 0.90]

Unnamed: 0,model,best_params,accuracy,r2_score,classification_report,wavelet,level
0,RandomForest,"{'bootstrap': False, 'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 150}",0.87963,0.936111,"{'0': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '2': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '3': {'precision': 0.9166666666666666, 'recall': 0.9166666666666666, 'f1-score': 0.9166666666666666, 'support': 12}, '4': {'precision': 0.8571428571428571, 'recall': 1.0, 'f1-score': 0.923076923076923, 'support': 12}, '5': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '6': {'precision': 0.8, 'recall': 0.6666666666666666, 'f1-score': 0.7272727272727272, 'support': 12}, '7': {'precision': 0.5294117647058824, 'recall': 0.75, 'f1-score': 0.6206896551724139, 'support': 12}, '8': {'precision': 1.0, 'recall': 0.6666666666666666, 'f1-score': 0.8, 'support': 12}, 'accuracy': 0.8796296296296297, 'macro avg': {'precision': 0.9003579209461563, 'recall': 0.8796296296296297, 'f1-score': 0.8826919679243518, 'support': 108}, 'weighted avg': {'precision': 0.9003579209461563, 'recall': 0.8796296296296297, 'f1-score': 0.8826919679243517, 'support': 108}}",bior2.2,2
17,LogisticRegression,"{'C': 100, 'max_iter': 1000, 'solver': 'liblinear'}",0.861111,0.945833,"{'0': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '2': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '3': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '4': {'precision': 0.9230769230769231, 'recall': 1.0, 'f1-score': 0.9600000000000001, 'support': 12}, '5': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '6': {'precision': 0.6, 'recall': 0.75, 'f1-score': 0.6666666666666665, 'support': 12}, '7': {'precision': 0.46153846153846156, 'recall': 0.5, 'f1-score': 0.48000000000000004, 'support': 12}, '8': {'precision': 0.8888888888888888, 'recall': 0.6666666666666666, 'f1-score': 0.761904761904762, 'support': 12}, 'accuracy': 0.8611111111111112, 'macro avg': {'precision': 0.8748338081671415, 'recall': 0.8611111111111112, 'f1-score': 0.864623878536922, 'support': 108}, 'weighted avg': {'precision': 0.8748338081671415, 'recall': 0.8611111111111112, 'f1-score': 0.864623878536922, 'support': 108}}",bior3.3,2
45,RandomForest,"{'bootstrap': False, 'max_depth': 13, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 150}",0.898148,0.906944,"{'0': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '2': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '3': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '4': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '5': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '6': {'precision': 0.75, 'recall': 0.75, 'f1-score': 0.75, 'support': 12}, '7': {'precision': 0.5625, 'recall': 0.75, 'f1-score': 0.6428571428571429, 'support': 12}, '8': {'precision': 0.9, 'recall': 0.75, 'f1-score': 0.8181818181818182, 'support': 12}, 'accuracy': 0.8981481481481481, 'macro avg': {'precision': 0.9125000000000001, 'recall': 0.898148148148148, 'f1-score': 0.90267582658887, 'support': 108}, 'weighted avg': {'precision': 0.9125, 'recall': 0.8981481481481481, 'f1-score': 0.9026758265888701, 'support': 108}}",coif3,2
50,LogisticRegression,"{'C': 100, 'max_iter': 1000, 'solver': 'liblinear'}",0.814815,0.902778,"{'0': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '2': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '3': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '4': {'precision': 0.8571428571428571, 'recall': 1.0, 'f1-score': 0.923076923076923, 'support': 12}, '5': {'precision': 0.9230769230769231, 'recall': 1.0, 'f1-score': 0.9600000000000001, 'support': 12}, '6': {'precision': 0.5, 'recall': 0.5833333333333334, 'f1-score': 0.5384615384615384, 'support': 12}, '7': {'precision': 0.3333333333333333, 'recall': 0.3333333333333333, 'f1-score': 0.3333333333333333, 'support': 12}, '8': {'precision': 0.7777777777777778, 'recall': 0.5833333333333334, 'f1-score': 0.6666666666666666, 'support': 12}, 'accuracy': 0.8148148148148148, 'macro avg': {'precision': 0.8212589879256545, 'recall': 0.8148148148148148, 'f1-score': 0.8149535488665923, 'support': 108}, 'weighted avg': {'precision': 0.8212589879256545, 'recall': 0.8148148148148148, 'f1-score': 0.8149535488665923, 'support': 108}}",coif3,3
51,RandomForest,"{'bootstrap': False, 'max_depth': 17, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}",0.87037,0.944444,"{'0': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '2': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '3': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '4': {'precision': 0.9230769230769231, 'recall': 1.0, 'f1-score': 0.9600000000000001, 'support': 12}, '5': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '6': {'precision': 0.6666666666666666, 'recall': 0.6666666666666666, 'f1-score': 0.6666666666666666, 'support': 12}, '7': {'precision': 0.5, 'recall': 0.6666666666666666, 'f1-score': 0.5714285714285715, 'support': 12}, '8': {'precision': 0.8888888888888888, 'recall': 0.6666666666666666, 'f1-score': 0.761904761904762, 'support': 12}, 'accuracy': 0.8703703703703703, 'macro avg': {'precision': 0.8865147198480532, 'recall': 0.8703703703703703, 'f1-score': 0.8747826086956522, 'support': 108}, 'weighted avg': {'precision': 0.8865147198480533, 'recall': 0.8703703703703703, 'f1-score': 0.8747826086956522, 'support': 108}}",coif3,4
60,RandomForest,"{'bootstrap': False, 'max_depth': 13, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}",0.907407,0.970833,"{'0': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '2': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '3': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '4': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '5': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '6': {'precision': 0.8181818181818182, 'recall': 0.75, 'f1-score': 0.7826086956521738, 'support': 12}, '7': {'precision': 0.625, 'recall': 0.8333333333333334, 'f1-score': 0.7142857142857143, 'support': 12}, '8': {'precision': 0.8, 'recall': 0.6666666666666666, 'f1-score': 0.7272727272727272, 'support': 12}, 'accuracy': 0.9074074074074074, 'macro avg': {'precision': 0.915909090909091, 'recall': 0.9074074074074073, 'f1-score': 0.908965430704561, 'support': 108}, 'weighted avg': {'precision': 0.9159090909090908, 'recall': 0.9074074074074074, 'f1-score': 0.908965430704561, 'support': 108}}",coif4,2
63,RandomForest,"{'bootstrap': False, 'max_depth': 17, 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 150}",0.87963,0.941667,"{'0': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '2': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '3': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '4': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '5': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '6': {'precision': 0.6923076923076923, 'recall': 0.75, 'f1-score': 0.7199999999999999, 'support': 12}, '7': {'precision': 0.5625, 'recall': 0.75, 'f1-score': 0.6428571428571429, 'support': 12}, '8': {'precision': 0.8, 'recall': 0.6666666666666666, 'f1-score': 0.7272727272727272, 'support': 12}, 'accuracy': 0.8796296296296297, 'macro avg': {'precision': 0.8949786324786325, 'recall': 0.8796296296296297, 'f1-score': 0.8844105652801305, 'support': 108}, 'weighted avg': {'precision': 0.8949786324786325, 'recall': 0.8796296296296297, 'f1-score': 0.8844105652801304, 'support': 108}}",coif4,3
65,LogisticRegression,"{'C': 10, 'max_iter': 1000, 'solver': 'liblinear'}",0.805556,0.901389,"{'0': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '2': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '3': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '4': {'precision': 0.8461538461538461, 'recall': 0.9166666666666666, 'f1-score': 0.8799999999999999, 'support': 12}, '5': {'precision': 0.9166666666666666, 'recall': 0.9166666666666666, 'f1-score': 0.9166666666666666, 'support': 12}, '6': {'precision': 0.46153846153846156, 'recall': 0.5, 'f1-score': 0.48000000000000004, 'support': 12}, '7': {'precision': 0.375, 'recall': 0.5, 'f1-score': 0.42857142857142855, 'support': 12}, '8': {'precision': 0.875, 'recall': 0.5833333333333334, 'f1-score': 0.7000000000000001, 'support': 12}, 'accuracy': 0.8055555555555556, 'macro avg': {'precision': 0.8304843304843305, 'recall': 0.8055555555555555, 'f1-score': 0.8131423970554406, 'support': 108}, 'weighted avg': {'precision': 0.8304843304843305, 'recall': 0.8055555555555556, 'f1-score': 0.8131423970554407, 'support': 108}}",coif4,3
66,RandomForest,"{'bootstrap': False, 'max_depth': 13, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}",0.87963,0.913889,"{'0': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '2': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '3': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '4': {'precision': 0.9090909090909091, 'recall': 0.8333333333333334, 'f1-score': 0.8695652173913043, 'support': 12}, '5': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '6': {'precision': 0.75, 'recall': 0.75, 'f1-score': 0.75, 'support': 12}, '7': {'precision': 0.5555555555555556, 'recall': 0.8333333333333334, 'f1-score': 0.6666666666666667, 'support': 12}, '8': {'precision': 0.8888888888888888, 'recall': 0.6666666666666666, 'f1-score': 0.761904761904762, 'support': 12}, 'accuracy': 0.8796296296296297, 'macro avg': {'precision': 0.9003928170594837, 'recall': 0.8796296296296297, 'f1-score': 0.884575569358178, 'support': 108}, 'weighted avg': {'precision': 0.9003928170594838, 'recall': 0.8796296296296297, 'f1-score': 0.884575569358178, 'support': 108}}",coif4,4
69,RandomForest,"{'bootstrap': False, 'max_depth': 13, 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 200}",0.861111,0.913889,"{'0': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '2': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12}, '3': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '4': {'precision': 0.9090909090909091, 'recall': 0.8333333333333334, 'f1-score': 0.8695652173913043, 'support': 12}, '5': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 12}, '6': {'precision': 0.6428571428571429, 'recall': 0.75, 'f1-score': 0.6923076923076924, 'support': 12}, '7': {'precision': 0.5625, 'recall': 0.75, 'f1-score': 0.6428571428571429, 'support': 12}, '8': {'precision': 0.7777777777777778, 'recall': 0.5833333333333334, 'f1-score': 0.6666666666666666, 'support': 12}, 'accuracy': 0.8611111111111112, 'macro avg': {'precision': 0.8769139810806478, 'recall': 0.861111111111111, 'f1-score': 0.8649377997204085, 'support': 108}, 'weighted avg': {'precision': 0.8769139810806478, 'recall': 0.8611111111111112, 'f1-score': 0.8649377997204084, 'support': 108}}",coif4,5


In [117]:
import json

# Extract and print the full classification report
for index, row in results_df[results_df['r2_score'] > 0.93].iterrows():
    print(f"Index {index}:")
    print(json.dumps(row['best_params'], indent=4))
    print("\n" + "="*50 + "\n")

Index 8:
{
    "bootstrap": false,
    "max_depth": 20,
    "min_samples_leaf": 2,
    "min_samples_split": 2,
    "n_estimators": 150
}


Index 80:
{
    "bootstrap": true,
    "max_depth": 15,
    "min_samples_leaf": 1,
    "min_samples_split": 5,
    "n_estimators": 100
}


Index 84:
{
    "bootstrap": true,
    "max_depth": 15,
    "min_samples_leaf": 1,
    "min_samples_split": 2,
    "n_estimators": 50
}


Index 88:
{
    "bootstrap": false,
    "max_depth": 15,
    "min_samples_leaf": 1,
    "min_samples_split": 2,
    "n_estimators": 200
}


Index 100:
{
    "bootstrap": true,
    "max_depth": 15,
    "min_samples_leaf": 1,
    "min_samples_split": 2,
    "n_estimators": 200
}


Index 106:
{
    "C": 10,
    "max_iter": 1000,
    "solver": "liblinear"
}


Index 160:
{
    "bootstrap": false,
    "max_depth": 10,
    "min_samples_leaf": 1,
    "min_samples_split": 2,
    "n_estimators": 50
}


Index 220:
{
    "bootstrap": true,
    "max_depth": 10,
    "min_samples_leaf": 2,
  

In [118]:
import json

# Extract and print the full classification report
for index, row in results_df[results_df['r2_score'] > 0.93].iterrows():
    print(f"Index {index}:")
    print(json.dumps(row['classification_report'], indent=4))
    print("\n" + "="*50 + "\n")

Index 8:
{
    "0": {
        "precision": 1.0,
        "recall": 1.0,
        "f1-score": 1.0,
        "support": 12
    },
    "1": {
        "precision": 1.0,
        "recall": 1.0,
        "f1-score": 1.0,
        "support": 12
    },
    "2": {
        "precision": 1.0,
        "recall": 1.0,
        "f1-score": 1.0,
        "support": 12
    },
    "3": {
        "precision": 1.0,
        "recall": 0.8333333333333334,
        "f1-score": 0.9090909090909091,
        "support": 12
    },
    "4": {
        "precision": 0.8571428571428571,
        "recall": 1.0,
        "f1-score": 0.923076923076923,
        "support": 12
    },
    "5": {
        "precision": 1.0,
        "recall": 0.9166666666666666,
        "f1-score": 0.9565217391304348,
        "support": 12
    },
    "6": {
        "precision": 0.6666666666666666,
        "recall": 0.6666666666666666,
        "f1-score": 0.6666666666666666,
        "support": 12
    },
    "7": {
        "precision": 0.5294117647058824,
     