### **This notebook will attempt to remove outliers from each group** 

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import cross_validate, GroupKFold
from sklearn.metrics import accuracy_score
from Spectra_Preparation_Functions import *

In [2]:
def prepare_wavelength_df(df, absorbance_col, status_col='Status'):

    # Pivot the DataFrame to get wavelengths as columns and absorbance values
    wavelength_df = df.pivot(index='SpecID', columns='WaveNumber', values=absorbance_col).reset_index()
    wavelength_df.columns.name = None

    # Merge with the statuses based on SpecID
    # Include the SurID to perform GroupKFold CV
    statuses_and_surface = df[['SpecID', 'SurID', status_col]].drop_duplicates()
    wavelength_df = pd.merge(wavelength_df, statuses_and_surface, on='SpecID')

    # Set SpecID as the index
    wavelength_df = wavelength_df.set_index('SpecID')

    return wavelength_df

In [3]:
def evaluate_extra_trees(df):

    # Set the Surfaces as groups
    groups = df['SurID']
    X = df.drop(['Status', 'SurID'], axis=1)
    y = df['Status']

    # Creating the Extra Trees classifier
    et = ExtraTreesClassifier(random_state=1234)
    
    # Using GroupKFold for classification tasks
    cv = GroupKFold(n_splits=10)

    scores = cross_validate(et, X, y, groups=groups, cv=cv, scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'], n_jobs=-1)

    # Displaying the results
    print(f"{et.__class__.__name__} Cross-Validation Accuracy: {np.mean(scores['test_accuracy']):.4f} +/- {np.std(scores['test_accuracy']):.4f}")
    print(f"{et.__class__.__name__} Cross-Validation Precision: {np.mean(scores['test_precision_macro']):.4f} +/- {np.std(scores['test_precision_macro']):.4f}")
    print(f"{et.__class__.__name__} Cross-Validation Recall: {np.mean(scores['test_recall_macro']):.4f} +/- {np.std(scores['test_recall_macro']):.4f}")
    print(f"{et.__class__.__name__} Cross-Validation F1-Score: {np.mean(scores['test_f1_macro']):.4f} +/- {np.std(scores['test_f1_macro']):.4f}")

In [4]:
spectra_df = pd.read_csv("../../optuna_cleaning_spectra.csv")
df = spectra_df

In [5]:
#sns.lineplot(data=df, x='WaveNumber', y='Absorbance', hue='Status', errorbar=('ci', False))

In [6]:
df

Unnamed: 0,SpecID,Seq,WaveNumber,Absorbance,SurID,Status
0,201210-1-00,293,400.22778,0.004884,201210-1,Normal
1,201210-1-00,294,400.91116,0.008132,201210-1,Normal
2,201210-1-00,295,401.59454,0.006195,201210-1,Normal
3,201210-1-00,296,402.27789,0.001867,201210-1,Normal
4,201210-1-00,297,402.96127,0.002789,201210-1,Normal
...,...,...,...,...,...,...
6239200,210526-3-09,2337,1797.03870,-0.013474,210526-3,Hyperglycemia
6239201,210526-3-09,2338,1797.72200,-0.010910,210526-3,Hyperglycemia
6239202,210526-3-09,2339,1798.40550,-0.010800,210526-3,Hyperglycemia
6239203,210526-3-09,2340,1799.08890,-0.009367,210526-3,Hyperglycemia


In [7]:
wavelength_df = prepare_wavelength_df(df, "Absorbance")
evaluate_extra_trees(wavelength_df)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5866 +/- 0.1315
ExtraTreesClassifier Cross-Validation Precision: 0.5893 +/- 0.1335
ExtraTreesClassifier Cross-Validation Recall: 0.6020 +/- 0.1675
ExtraTreesClassifier Cross-Validation F1-Score: 0.5613 +/- 0.1538


Test on one surface

In [8]:
#df = df[df['SurID'] == '201210-1']

##### **Function to apply quartiles calculation for each SurID group, then by WaveNumber**


In [9]:
# Correctly define the function to apply multiple aggregations

def get_surface_wavenumber_quartiles(df):

    # Apply aggregation directly within groupby
    grouped = df.groupby(['SurID', 'WaveNumber']).agg(
        Lower_Quartile=('Absorbance', lambda x: x.quantile(0.25)),
        Median=('Absorbance', 'median'),
        Upper_Quartile=('Absorbance', lambda x: x.quantile(0.75))
    ).reset_index()
    return grouped

# Apply the function to your DataFrame
quartile_data = get_surface_wavenumber_quartiles(df)

# Calculate the IQR
quartile_data['IQR'] = quartile_data['Upper_Quartile'] - quartile_data['Lower_Quartile']

In [10]:
quartile_data

Unnamed: 0,SurID,WaveNumber,Lower_Quartile,Median,Upper_Quartile,IQR
0,201210-1,400.22778,-0.022730,-0.006066,0.027397,0.050127
1,201210-1,400.91116,-0.018262,-0.009978,0.028784,0.047045
2,201210-1,401.59454,-0.019610,-0.013899,0.026280,0.045890
3,201210-1,402.27789,-0.020536,-0.008371,0.026773,0.047309
4,201210-1,402.96127,-0.020752,-0.012366,0.028321,0.049073
...,...,...,...,...,...,...
129082,210526-3,1797.03870,-0.014312,-0.013240,-0.011932,0.002381
129083,210526-3,1797.72200,-0.014217,-0.012470,-0.010963,0.003254
129084,210526-3,1798.40550,-0.014920,-0.012806,-0.011134,0.003787
129085,210526-3,1799.08890,-0.014802,-0.013706,-0.012457,0.002345


Now we shall remove outliers spectra that have an absorbance outside 1.5 times the IQR at each WaveNumber in their group.

In [11]:
merged_df = df.merge(quartile_data, on=['SurID', 'WaveNumber'])
merged_df

Unnamed: 0,SpecID,Seq,WaveNumber,Absorbance,SurID,Status,Lower_Quartile,Median,Upper_Quartile,IQR
0,201210-1-00,293,400.22778,0.004884,201210-1,Normal,-0.022730,-0.006066,0.027397,0.050127
1,201210-1-00,294,400.91116,0.008132,201210-1,Normal,-0.018262,-0.009978,0.028784,0.047045
2,201210-1-00,295,401.59454,0.006195,201210-1,Normal,-0.019610,-0.013899,0.026280,0.045890
3,201210-1-00,296,402.27789,0.001867,201210-1,Normal,-0.020536,-0.008371,0.026773,0.047309
4,201210-1-00,297,402.96127,0.002789,201210-1,Normal,-0.020752,-0.012366,0.028321,0.049073
...,...,...,...,...,...,...,...,...,...,...
6239200,210526-3-09,2337,1797.03870,-0.013474,210526-3,Hyperglycemia,-0.014312,-0.013240,-0.011932,0.002381
6239201,210526-3-09,2338,1797.72200,-0.010910,210526-3,Hyperglycemia,-0.014217,-0.012470,-0.010963,0.003254
6239202,210526-3-09,2339,1798.40550,-0.010800,210526-3,Hyperglycemia,-0.014920,-0.012806,-0.011134,0.003787
6239203,210526-3-09,2340,1799.08890,-0.009367,210526-3,Hyperglycemia,-0.014802,-0.013706,-0.012457,0.002345


In [12]:
# Filter rows where Absorbance is an outlier
outliers = merged_df[
    (merged_df['Absorbance'] < (merged_df['Lower_Quartile'] - 1.5 * merged_df['IQR'])) |
    (merged_df['Absorbance'] > (merged_df['Upper_Quartile'] + 1.5 * merged_df['IQR']))
]

outliers

Unnamed: 0,SpecID,Seq,WaveNumber,Absorbance,SurID,Status,Lower_Quartile,Median,Upper_Quartile,IQR
249,201210-1-00,542,570.38727,-0.010473,201210-1,Normal,0.009570,0.015201,0.022182,0.012612
276,201210-1-00,569,588.83826,-0.002877,201210-1,Normal,0.013145,0.015302,0.022899,0.009755
302,201210-1-00,595,606.60590,0.044527,201210-1,Normal,0.020733,0.024311,0.029903,0.009170
308,201210-1-00,601,610.70618,0.045821,201210-1,Normal,0.022459,0.027694,0.029968,0.007509
309,201210-1-00,602,611.38953,0.047297,201210-1,Normal,0.023423,0.025737,0.031811,0.008388
...,...,...,...,...,...,...,...,...,...,...
6238870,210526-3-09,2007,1571.52620,0.049499,210526-3,Hyperglycemia,0.027955,0.030754,0.035278,0.007323
6238871,210526-3-09,2008,1572.20960,0.047832,210526-3,Hyperglycemia,0.027665,0.030032,0.033470,0.005805
6238872,210526-3-09,2009,1572.89290,0.045638,210526-3,Hyperglycemia,0.027989,0.030845,0.034044,0.006056
6239096,210526-3-09,2233,1725.96810,-0.007726,210526-3,Hyperglycemia,-0.004818,-0.003969,-0.003209,0.001609


**If a defined proportion of a spectra is an outlier drop it from the dataframe**

In [13]:
spectra_length = df['WaveNumber'].nunique()  # This should be consistent for all spectra
spectra_length

2049

In [14]:
outlier_proportions = outliers['SpecID'].value_counts()/spectra_length
outlier_proportions

SpecID
210225-2-17    0.600781
210225-2-19    0.584675
210401-1-17    0.523670
210225-1-28    0.502684
210225-2-18    0.501220
                 ...   
210217-2-02    0.000488
210304-1-08    0.000488
210510-2-33    0.000488
210225-1-13    0.000488
210211-1-25    0.000488
Name: count, Length: 2984, dtype: float64

**Set the proportion of outliers needed to drop this spectra.**

In [29]:
proportion_cutoff = 0.1
outlier_SpecIDs = outlier_proportions[outlier_proportions > proportion_cutoff].index
print(outlier_SpecIDs)
print(len(outlier_SpecIDs))

Index(['210225-2-17', '210225-2-19', '210401-1-17', '210225-1-28',
       '210225-2-18', '210421-1-00', '210225-2-16', '210324-2-34',
       '210225-1-11', '210414-1-05',
       ...
       '210331-1-22', '210315-1-16', '210519-1-24', '210217-2-45',
       '210310-1-27', '210310-1-06', '210331-1-34', '210114-1-38',
       '210324-2-35', '210407-2-12'],
      dtype='object', name='SpecID', length=217)
217


Drop the outler spectra from the dataset

In [30]:
clean_df = spectra_df[~spectra_df['SpecID'].isin(outlier_SpecIDs)]

In [31]:
clean_df

Unnamed: 0,SpecID,Seq,WaveNumber,Absorbance,SurID,Status
0,201210-1-00,293,400.22778,0.004884,201210-1,Normal
1,201210-1-00,294,400.91116,0.008132,201210-1,Normal
2,201210-1-00,295,401.59454,0.006195,201210-1,Normal
3,201210-1-00,296,402.27789,0.001867,201210-1,Normal
4,201210-1-00,297,402.96127,0.002789,201210-1,Normal
...,...,...,...,...,...,...
6239200,210526-3-09,2337,1797.03870,-0.013474,210526-3,Hyperglycemia
6239201,210526-3-09,2338,1797.72200,-0.010910,210526-3,Hyperglycemia
6239202,210526-3-09,2339,1798.40550,-0.010800,210526-3,Hyperglycemia
6239203,210526-3-09,2340,1799.08890,-0.009367,210526-3,Hyperglycemia


In [32]:
# clean_df.to_csv("../../data/outlier_spectra_removed.csv", index=False)

#### **Check model performance when outliers are removed**

In [33]:
clean_df

Unnamed: 0,SpecID,Seq,WaveNumber,Absorbance,SurID,Status
0,201210-1-00,293,400.22778,0.004884,201210-1,Normal
1,201210-1-00,294,400.91116,0.008132,201210-1,Normal
2,201210-1-00,295,401.59454,0.006195,201210-1,Normal
3,201210-1-00,296,402.27789,0.001867,201210-1,Normal
4,201210-1-00,297,402.96127,0.002789,201210-1,Normal
...,...,...,...,...,...,...
6239200,210526-3-09,2337,1797.03870,-0.013474,210526-3,Hyperglycemia
6239201,210526-3-09,2338,1797.72200,-0.010910,210526-3,Hyperglycemia
6239202,210526-3-09,2339,1798.40550,-0.010800,210526-3,Hyperglycemia
6239203,210526-3-09,2340,1799.08890,-0.009367,210526-3,Hyperglycemia


In [34]:
wavelength_df = prepare_wavelength_df(clean_df, 'Absorbance')

In [35]:
wavelength_df

Unnamed: 0_level_0,400.22778,400.91116,401.59454,402.27789,402.96127,403.64465,404.32803,405.01138,405.69476,406.37814,...,1794.9886,1795.672,1796.3553,1797.0387,1797.722,1798.4055,1799.0889,1799.7722,SurID,Status
SpecID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
201210-1-00,0.004884,0.008132,0.006195,0.001867,0.002789,0.004378,-0.001831,0.008044,0.008588,0.001198,...,-0.010521,-0.009203,-0.005168,-0.010362,-0.010012,-0.015556,-0.015209,-0.016938,201210-1,Normal
201210-1-01,0.008024,0.006805,0.004663,0.004657,0.007175,0.010595,0.011843,0.007460,0.008923,0.007735,...,-0.003285,-0.008513,-0.009282,-0.011029,-0.011218,-0.010855,-0.008731,-0.006102,201210-1,Normal
201210-1-02,0.017957,0.014304,0.013898,0.015367,0.016241,0.015541,0.013842,0.012572,0.013522,0.013054,...,-0.007020,-0.005598,-0.004788,-0.006653,-0.006995,-0.004912,-0.007203,-0.007529,201210-1,Normal
201210-1-03,0.012863,0.011919,0.011419,0.012159,0.013483,0.013985,0.013457,0.014292,0.013115,0.011297,...,-0.009791,-0.007825,-0.009147,-0.008598,-0.008812,-0.008099,-0.006736,-0.009135,201210-1,Normal
201210-1-04,0.027397,0.027086,0.026489,0.026085,0.024826,0.023755,0.024947,0.023863,0.023239,0.020719,...,-0.010142,-0.019552,-0.017806,-0.017629,-0.016764,-0.013863,-0.016637,-0.010374,201210-1,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210526-3-45,-0.012148,-0.007888,-0.007743,-0.010873,-0.011493,-0.008488,-0.006357,-0.007922,-0.009495,-0.008167,...,-0.014333,-0.014761,-0.009417,-0.013145,-0.011967,-0.009635,-0.013174,-0.015904,210526-3,Hyperglycemia
210526-3-46,-0.012786,-0.008874,-0.010482,-0.012470,-0.011828,-0.012800,-0.008957,-0.010703,-0.010377,-0.012326,...,-0.011278,-0.017077,-0.014021,-0.014313,-0.013098,-0.014203,-0.014531,-0.009325,210526-3,Hyperglycemia
210526-3-47,-0.010368,-0.009137,-0.006428,-0.006793,-0.008743,-0.006748,-0.005956,-0.006563,-0.009431,-0.009152,...,-0.008391,-0.010440,-0.011546,-0.012636,-0.012393,-0.018054,-0.009523,-0.012595,210526-3,Hyperglycemia
210526-3-48,-0.016023,-0.012578,-0.007327,-0.006224,-0.009936,-0.011675,-0.010872,-0.005398,-0.010617,-0.011278,...,-0.006257,-0.012827,-0.011455,-0.012318,-0.014246,-0.011085,-0.009457,-0.007821,210526-3,Hyperglycemia


In [36]:
wavelength_df['SurID'].nunique()

63

In [37]:
evaluate_extra_trees(wavelength_df)

ExtraTreesClassifier Cross-Validation Accuracy: 0.5990 +/- 0.1272
ExtraTreesClassifier Cross-Validation Precision: 0.5760 +/- 0.1396
ExtraTreesClassifier Cross-Validation Recall: 0.5520 +/- 0.1338
ExtraTreesClassifier Cross-Validation F1-Score: 0.5340 +/- 0.1309


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [24]:
for proportion_cutoff in range(10, 91, 5):
    proportion_cutoff /= 100  # Convert to proportion

    # Your existing code block
    outlier_SpecIDs = outlier_proportions[outlier_proportions > proportion_cutoff].index
    clean_df = spectra_df[~spectra_df['SpecID'].isin(outlier_SpecIDs)]
    wavelength_df = prepare_wavelength_df(clean_df, 'Absorbance')
    print(proportion_cutoff)
    evaluate_extra_trees(wavelength_df)

0.1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5990 +/- 0.1272
ExtraTreesClassifier Cross-Validation Precision: 0.5760 +/- 0.1396
ExtraTreesClassifier Cross-Validation Recall: 0.5520 +/- 0.1338
ExtraTreesClassifier Cross-Validation F1-Score: 0.5340 +/- 0.1309
0.15


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5449 +/- 0.1441
ExtraTreesClassifier Cross-Validation Precision: 0.5045 +/- 0.1346
ExtraTreesClassifier Cross-Validation Recall: 0.5135 +/- 0.1446
ExtraTreesClassifier Cross-Validation F1-Score: 0.4738 +/- 0.1405
0.2


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5472 +/- 0.1348
ExtraTreesClassifier Cross-Validation Precision: 0.5268 +/- 0.1514
ExtraTreesClassifier Cross-Validation Recall: 0.5125 +/- 0.1443
ExtraTreesClassifier Cross-Validation F1-Score: 0.4941 +/- 0.1458
0.25


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5908 +/- 0.1402
ExtraTreesClassifier Cross-Validation Precision: 0.5846 +/- 0.1526
ExtraTreesClassifier Cross-Validation Recall: 0.6045 +/- 0.1507
ExtraTreesClassifier Cross-Validation F1-Score: 0.5565 +/- 0.1617
0.3


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5778 +/- 0.1534
ExtraTreesClassifier Cross-Validation Precision: 0.5430 +/- 0.1326
ExtraTreesClassifier Cross-Validation Recall: 0.5162 +/- 0.1692
ExtraTreesClassifier Cross-Validation F1-Score: 0.5011 +/- 0.1443
0.35


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5952 +/- 0.1372
ExtraTreesClassifier Cross-Validation Precision: 0.5522 +/- 0.1525
ExtraTreesClassifier Cross-Validation Recall: 0.5814 +/- 0.1722
ExtraTreesClassifier Cross-Validation F1-Score: 0.5364 +/- 0.1589
0.4
ExtraTreesClassifier Cross-Validation Accuracy: 0.6106 +/- 0.1368
ExtraTreesClassifier Cross-Validation Precision: 0.5940 +/- 0.1250
ExtraTreesClassifier Cross-Validation Recall: 0.6207 +/- 0.1260
ExtraTreesClassifier Cross-Validation F1-Score: 0.5843 +/- 0.1299
0.45


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5987 +/- 0.1451
ExtraTreesClassifier Cross-Validation Precision: 0.5930 +/- 0.1262
ExtraTreesClassifier Cross-Validation Recall: 0.5844 +/- 0.1615
ExtraTreesClassifier Cross-Validation F1-Score: 0.5585 +/- 0.1458
0.5
ExtraTreesClassifier Cross-Validation Accuracy: 0.6180 +/- 0.0768
ExtraTreesClassifier Cross-Validation Precision: 0.6293 +/- 0.0582
ExtraTreesClassifier Cross-Validation Recall: 0.6298 +/- 0.0613
ExtraTreesClassifier Cross-Validation F1-Score: 0.5993 +/- 0.0662
0.55


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5992 +/- 0.0996
ExtraTreesClassifier Cross-Validation Precision: 0.5673 +/- 0.1256
ExtraTreesClassifier Cross-Validation Recall: 0.5743 +/- 0.1265
ExtraTreesClassifier Cross-Validation F1-Score: 0.5431 +/- 0.1178
0.6
ExtraTreesClassifier Cross-Validation Accuracy: 0.5918 +/- 0.0930
ExtraTreesClassifier Cross-Validation Precision: 0.5761 +/- 0.1212
ExtraTreesClassifier Cross-Validation Recall: 0.5981 +/- 0.1173
ExtraTreesClassifier Cross-Validation F1-Score: 0.5560 +/- 0.1167
0.65


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5866 +/- 0.1315
ExtraTreesClassifier Cross-Validation Precision: 0.5893 +/- 0.1335
ExtraTreesClassifier Cross-Validation Recall: 0.6020 +/- 0.1675
ExtraTreesClassifier Cross-Validation F1-Score: 0.5613 +/- 0.1538
0.7


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5866 +/- 0.1315
ExtraTreesClassifier Cross-Validation Precision: 0.5893 +/- 0.1335
ExtraTreesClassifier Cross-Validation Recall: 0.6020 +/- 0.1675
ExtraTreesClassifier Cross-Validation F1-Score: 0.5613 +/- 0.1538
0.75


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5866 +/- 0.1315
ExtraTreesClassifier Cross-Validation Precision: 0.5893 +/- 0.1335
ExtraTreesClassifier Cross-Validation Recall: 0.6020 +/- 0.1675
ExtraTreesClassifier Cross-Validation F1-Score: 0.5613 +/- 0.1538
0.8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5866 +/- 0.1315
ExtraTreesClassifier Cross-Validation Precision: 0.5893 +/- 0.1335
ExtraTreesClassifier Cross-Validation Recall: 0.6020 +/- 0.1675
ExtraTreesClassifier Cross-Validation F1-Score: 0.5613 +/- 0.1538
0.85


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5866 +/- 0.1315
ExtraTreesClassifier Cross-Validation Precision: 0.5893 +/- 0.1335
ExtraTreesClassifier Cross-Validation Recall: 0.6020 +/- 0.1675
ExtraTreesClassifier Cross-Validation F1-Score: 0.5613 +/- 0.1538
0.9


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5866 +/- 0.1315
ExtraTreesClassifier Cross-Validation Precision: 0.5893 +/- 0.1335
ExtraTreesClassifier Cross-Validation Recall: 0.6020 +/- 0.1675
ExtraTreesClassifier Cross-Validation F1-Score: 0.5613 +/- 0.1538
