### **This notebook will attempt to remove outliers from each group** 

In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import cross_validate, GroupKFold
from sklearn.metrics import accuracy_score
from Spectra_Preparation_Functions import *

In [3]:
def prepare_wavelength_df(df, absorbance_col, status_col='Status'):

    # Pivot the DataFrame to get wavelengths as columns and absorbance values
    wavelength_df = df.pivot(index='SpecID', columns='WaveNumber', values=absorbance_col).reset_index()
    wavelength_df.columns.name = None

    # Merge with the statuses based on SpecID
    # Include the SurID to perform GroupKFold CV
    statuses_and_surface = df[['SpecID', 'SurID', status_col]].drop_duplicates()
    wavelength_df = pd.merge(wavelength_df, statuses_and_surface, on='SpecID')

    # Set SpecID as the index
    wavelength_df = wavelength_df.set_index('SpecID')

    return wavelength_df

In [4]:
def evaluate_extra_trees(df):

    # Set the Surfaces as groups
    groups = df['SurID']
    X = df.drop(['Status', 'SurID'], axis=1)
    y = df['Status']

    # Creating the Extra Trees classifier
    et = ExtraTreesClassifier(random_state=1234)
    
    # Using GroupKFold for classification tasks
    cv = GroupKFold(n_splits=10)

    scores = cross_validate(et, X, y, groups=groups, cv=cv, scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'], n_jobs=-1)

    # Displaying the results
    print(f"{et.__class__.__name__} Cross-Validation Accuracy: {np.mean(scores['test_accuracy']):.4f} +/- {np.std(scores['test_accuracy']):.4f}")
    print(f"{et.__class__.__name__} Cross-Validation Precision: {np.mean(scores['test_precision_macro']):.4f} +/- {np.std(scores['test_precision_macro']):.4f}")
    print(f"{et.__class__.__name__} Cross-Validation Recall: {np.mean(scores['test_recall_macro']):.4f} +/- {np.std(scores['test_recall_macro']):.4f}")
    print(f"{et.__class__.__name__} Cross-Validation F1-Score: {np.mean(scores['test_f1_macro']):.4f} +/- {np.std(scores['test_f1_macro']):.4f}")

In [5]:
spectra_df = pd.read_csv("../../data/current_clean_spectrum.csv")
df = spectra_df

In [6]:
#sns.lineplot(data=df, x='WaveNumber', y='Absorbance', hue='Status', errorbar=('ci', False))

In [7]:
df

Unnamed: 0,SpecID,Seq,WaveNumber,SurID,Status,Absorbance
0,201210-1-00,293,400.22778,201210-1,Normal,0.016456
1,201210-1-00,294,400.91116,201210-1,Normal,0.015131
2,201210-1-00,295,401.59454,201210-1,Normal,0.014097
3,201210-1-00,296,402.27789,201210-1,Normal,0.013326
4,201210-1-00,297,402.96127,201210-1,Normal,0.012792
...,...,...,...,...,...,...
6239200,210526-3-09,2337,1797.03870,210526-3,Hyperglycemia,-0.004636
6239201,210526-3-09,2338,1797.72200,210526-3,Hyperglycemia,-0.004774
6239202,210526-3-09,2339,1798.40550,210526-3,Hyperglycemia,-0.005045
6239203,210526-3-09,2340,1799.08890,210526-3,Hyperglycemia,-0.005464


In [8]:
wavelength_df = prepare_wavelength_df(df, "Absorbance")
evaluate_extra_trees(wavelength_df)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5844 +/- 0.1088
ExtraTreesClassifier Cross-Validation Precision: 0.5928 +/- 0.1054
ExtraTreesClassifier Cross-Validation Recall: 0.5963 +/- 0.1511
ExtraTreesClassifier Cross-Validation F1-Score: 0.5618 +/- 0.1263


Test on one surface

In [9]:
#df = df[df['SurID'] == '201210-1']

##### **Function to apply quartiles calculation for each SurID group, then by WaveNumber**


In [10]:
# Correctly define the function to apply multiple aggregations

def get_surface_wavenumber_quartiles(df):

    # Apply aggregation directly within groupby
    grouped = df.groupby(['SurID', 'WaveNumber']).agg(
        Lower_Quartile=('Absorbance', lambda x: x.quantile(0.25)),
        Median=('Absorbance', 'median'),
        Upper_Quartile=('Absorbance', lambda x: x.quantile(0.75))
    ).reset_index()
    return grouped

# Apply the function to your DataFrame
quartile_data = get_surface_wavenumber_quartiles(df)

# Calculate the IQR
quartile_data['IQR'] = quartile_data['Upper_Quartile'] - quartile_data['Lower_Quartile']

In [11]:
quartile_data

Unnamed: 0,SurID,WaveNumber,Lower_Quartile,Median,Upper_Quartile,IQR
0,201210-1,400.22778,-0.014502,0.000618,0.017999,0.032501
1,201210-1,400.91116,-0.013605,-0.004298,0.018360,0.031965
2,201210-1,401.59454,-0.013950,-0.004433,0.018843,0.032793
3,201210-1,402.27789,-0.014132,-0.004606,0.018384,0.032516
4,201210-1,402.96127,-0.014043,-0.004860,0.018145,0.032188
...,...,...,...,...,...,...
129082,210526-3,1797.03870,-0.009490,-0.007596,-0.006189,0.003301
129083,210526-3,1797.72200,-0.009479,-0.007702,-0.006198,0.003281
129084,210526-3,1798.40550,-0.009272,-0.007801,-0.006312,0.002960
129085,210526-3,1799.08890,-0.009740,-0.007865,-0.006533,0.003207


Now we shall remove outliers spectra that have an absorbance outside 1.5 times the IQR at each WaveNumber in their group.

In [12]:
merged_df = df.merge(quartile_data, on=['SurID', 'WaveNumber'])
merged_df

Unnamed: 0,SpecID,Seq,WaveNumber,SurID,Status,Absorbance,Lower_Quartile,Median,Upper_Quartile,IQR
0,201210-1-00,293,400.22778,201210-1,Normal,0.016456,-0.014502,0.000618,0.017999,0.032501
1,201210-1-01,293,400.22778,201210-1,Normal,0.013911,-0.014502,0.000618,0.017999,0.032501
2,201210-1-02,293,400.22778,201210-1,Normal,0.018115,-0.014502,0.000618,0.017999,0.032501
3,201210-1-03,293,400.22778,201210-1,Normal,0.019180,-0.014502,0.000618,0.017999,0.032501
4,201210-1-04,293,400.22778,201210-1,Normal,0.033612,-0.014502,0.000618,0.017999,0.032501
...,...,...,...,...,...,...,...,...,...,...
6239200,210526-3-05,2341,1799.77220,210526-3,Hyperglycemia,-0.008584,-0.009345,-0.007969,-0.006200,0.003144
6239201,210526-3-06,2341,1799.77220,210526-3,Hyperglycemia,-0.005974,-0.009345,-0.007969,-0.006200,0.003144
6239202,210526-3-07,2341,1799.77220,210526-3,Hyperglycemia,-0.009211,-0.009345,-0.007969,-0.006200,0.003144
6239203,210526-3-08,2341,1799.77220,210526-3,Hyperglycemia,-0.007701,-0.009345,-0.007969,-0.006200,0.003144


In [13]:
# Filter rows where Absorbance is an outlier
outliers = merged_df[
    (merged_df['Absorbance'] < (merged_df['Lower_Quartile'] - 1.5 * merged_df['IQR'])) |
    (merged_df['Absorbance'] > (merged_df['Upper_Quartile'] + 1.5 * merged_df['IQR']))
]

outliers

Unnamed: 0,SpecID,Seq,WaveNumber,SurID,Status,Absorbance,Lower_Quartile,Median,Upper_Quartile,IQR
10549,201210-1-15,508,547.15265,201210-1,Normal,0.047919,0.012860,0.015299,0.026835,0.013974
10593,201210-1-10,509,547.83600,201210-1,Normal,0.046174,0.012461,0.015920,0.025928,0.013467
10598,201210-1-15,509,547.83600,201210-1,Normal,0.048492,0.012461,0.015920,0.025928,0.013467
10599,201210-1-16,509,547.83600,201210-1,Normal,0.047270,0.012461,0.015920,0.025928,0.013467
10642,201210-1-10,510,548.51935,201210-1,Normal,0.046621,0.012076,0.016305,0.025564,0.013488
...,...,...,...,...,...,...,...,...,...,...
6237970,210526-3-22,2317,1783.37130,210526-3,Hyperglycemia,-0.003827,-0.009130,-0.008404,-0.007080,0.002049
6239148,210526-3-48,2340,1799.08890,210526-3,Hyperglycemia,-0.000998,-0.009740,-0.007865,-0.006533,0.003207
6239157,210526-3-10,2341,1799.77220,210526-3,Hyperglycemia,-0.014774,-0.009345,-0.007969,-0.006200,0.003144
6239190,210526-3-40,2341,1799.77220,210526-3,Hyperglycemia,-0.001471,-0.009345,-0.007969,-0.006200,0.003144


**If a defined proportion of a spectra is an outlier drop it from the dataframe**

In [14]:
spectra_length = df['WaveNumber'].nunique()  # This should be consistent for all spectra
spectra_length

2049

In [15]:
outlier_proportions = outliers['SpecID'].value_counts()/spectra_length
outlier_proportions

SpecID
210225-1-28    0.615422
210401-1-17    0.567594
210419-2-26    0.475842
210401-1-18    0.463641
210419-2-27    0.462665
                 ...   
201210-2-08    0.000488
210304-1-24    0.000488
210526-1-12    0.000488
210303-2-05    0.000488
210526-3-26    0.000488
Name: count, Length: 2827, dtype: float64

**Set the proportion of outliers needed to drop this spectra.**

In [16]:
proportion_cutoff = 0.03
outlier_SpecIDs = outlier_proportions[outlier_proportions > proportion_cutoff].index
print(outlier_SpecIDs)
print(len(outlier_SpecIDs))

Index(['210225-1-28', '210401-1-17', '210419-2-26', '210401-1-18',
       '210419-2-27', '210421-1-00', '210429-1-13', '210414-1-05',
       '210324-2-34', '210225-1-11',
       ...
       '210303-2-14', '210526-2-45', '210519-3-06', '210401-1-26',
       '210303-1-18', '210504-3-37', '210504-1-32', '210303-2-02',
       '210121-2-04', '210304-2-26'],
      dtype='object', name='SpecID', length=1289)
1289


Drop the outler spectra from the dataset

In [17]:
clean_df = spectra_df[~spectra_df['SpecID'].isin(outlier_SpecIDs)]

In [18]:
clean_df

Unnamed: 0,SpecID,Seq,WaveNumber,SurID,Status,Absorbance
12294,201210-1-06,293,400.22778,201210-1,Normal,0.036439
12295,201210-1-06,294,400.91116,201210-1,Normal,0.037923
12296,201210-1-06,295,401.59454,201210-1,Normal,0.039202
12297,201210-1-06,296,402.27789,201210-1,Normal,0.040295
12298,201210-1-06,297,402.96127,201210-1,Normal,0.041221
...,...,...,...,...,...,...
6239200,210526-3-09,2337,1797.03870,210526-3,Hyperglycemia,-0.004636
6239201,210526-3-09,2338,1797.72200,210526-3,Hyperglycemia,-0.004774
6239202,210526-3-09,2339,1798.40550,210526-3,Hyperglycemia,-0.005045
6239203,210526-3-09,2340,1799.08890,210526-3,Hyperglycemia,-0.005464


In [19]:
# clean_df.to_csv("../../data/outlier_spectra_removed.csv", index=False)

#### **Check model performance when outliers are removed**

In [20]:
clean_df

Unnamed: 0,SpecID,Seq,WaveNumber,SurID,Status,Absorbance
12294,201210-1-06,293,400.22778,201210-1,Normal,0.036439
12295,201210-1-06,294,400.91116,201210-1,Normal,0.037923
12296,201210-1-06,295,401.59454,201210-1,Normal,0.039202
12297,201210-1-06,296,402.27789,201210-1,Normal,0.040295
12298,201210-1-06,297,402.96127,201210-1,Normal,0.041221
...,...,...,...,...,...,...
6239200,210526-3-09,2337,1797.03870,210526-3,Hyperglycemia,-0.004636
6239201,210526-3-09,2338,1797.72200,210526-3,Hyperglycemia,-0.004774
6239202,210526-3-09,2339,1798.40550,210526-3,Hyperglycemia,-0.005045
6239203,210526-3-09,2340,1799.08890,210526-3,Hyperglycemia,-0.005464


In [21]:
wavelength_df = prepare_wavelength_df(clean_df, 'Absorbance')

In [22]:
wavelength_df

Unnamed: 0_level_0,400.22778,400.91116,401.59454,402.27789,402.96127,403.64465,404.32803,405.01138,405.69476,406.37814,...,1794.9886,1795.672,1796.3553,1797.0387,1797.722,1798.4055,1799.0889,1799.7722,SurID,Status
SpecID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
201210-1-06,0.036439,0.037923,0.039202,0.040295,0.041221,0.041998,0.042643,0.043177,0.043617,0.043982,...,-0.004370,-0.004632,-0.005042,-0.005617,-0.006372,-0.007324,-0.008489,-0.009884,201210-1,Normal
201210-1-07,0.026772,0.028477,0.029868,0.030970,0.031808,0.032408,0.032793,0.032990,0.033023,0.032917,...,-0.005550,-0.005327,-0.005038,-0.004676,-0.004234,-0.003707,-0.003089,-0.002373,201210-1,Normal
201210-1-09,0.031068,0.031347,0.031547,0.031678,0.031750,0.031775,0.031763,0.031724,0.031669,0.031609,...,-0.002397,-0.002482,-0.002633,-0.002856,-0.003161,-0.003554,-0.004045,-0.004642,201210-1,Normal
201210-1-10,0.015936,0.015030,0.014311,0.013768,0.013389,0.013161,0.013073,0.013112,0.013267,0.013526,...,-0.004651,-0.004611,-0.004631,-0.004721,-0.004892,-0.005155,-0.005520,-0.005997,201210-1,Normal
201210-1-12,0.008295,0.009455,0.010462,0.011326,0.012054,0.012657,0.013144,0.013525,0.013808,0.014003,...,-0.007064,-0.007164,-0.007127,-0.006930,-0.006552,-0.005969,-0.005159,-0.004098,201210-1,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210526-3-45,-0.001485,-0.000878,-0.000402,-0.000043,0.000207,0.000360,0.000427,0.000418,0.000344,0.000217,...,-0.006682,-0.006452,-0.006268,-0.006140,-0.006074,-0.006079,-0.006162,-0.006332,210526-3,Hyperglycemia
210526-3-46,-0.002341,-0.002507,-0.002601,-0.002634,-0.002616,-0.002558,-0.002469,-0.002360,-0.002242,-0.002125,...,-0.007714,-0.007414,-0.007165,-0.006984,-0.006882,-0.006875,-0.006976,-0.007200,210526-3,Hyperglycemia
210526-3-47,0.000508,0.001394,0.002019,0.002410,0.002595,0.002598,0.002449,0.002172,0.001796,0.001346,...,-0.007463,-0.006981,-0.006512,-0.006065,-0.005647,-0.005264,-0.004924,-0.004635,210526-3,Hyperglycemia
210526-3-48,-0.003978,-0.002882,-0.002103,-0.001602,-0.001346,-0.001297,-0.001420,-0.001680,-0.002040,-0.002465,...,-0.006052,-0.005594,-0.005001,-0.004258,-0.003353,-0.002271,-0.000998,0.000479,210526-3,Hyperglycemia


In [23]:
wavelength_df['SurID'].nunique()

63

In [24]:
evaluate_extra_trees(wavelength_df)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.6611 +/- 0.1276
ExtraTreesClassifier Cross-Validation Precision: 0.5889 +/- 0.1382
ExtraTreesClassifier Cross-Validation Recall: 0.5836 +/- 0.1306
ExtraTreesClassifier Cross-Validation F1-Score: 0.5585 +/- 0.1384


In [25]:
for proportion_cutoff in range(10, 91, 5):
    proportion_cutoff /= 100  # Convert to proportion

    # Your existing code block
    outlier_SpecIDs = outlier_proportions[outlier_proportions > proportion_cutoff].index
    clean_df = spectra_df[~spectra_df['SpecID'].isin(outlier_SpecIDs)]
    wavelength_df = prepare_wavelength_df(clean_df, 'Absorbance')
    print(proportion_cutoff)
    evaluate_extra_trees(wavelength_df)

0.1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5613 +/- 0.1213
ExtraTreesClassifier Cross-Validation Precision: 0.5461 +/- 0.1093
ExtraTreesClassifier Cross-Validation Recall: 0.5216 +/- 0.1393
ExtraTreesClassifier Cross-Validation F1-Score: 0.4895 +/- 0.1177
0.15


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5667 +/- 0.1054
ExtraTreesClassifier Cross-Validation Precision: 0.5613 +/- 0.1159
ExtraTreesClassifier Cross-Validation Recall: 0.5016 +/- 0.1479
ExtraTreesClassifier Cross-Validation F1-Score: 0.4981 +/- 0.1272
0.2
ExtraTreesClassifier Cross-Validation Accuracy: 0.5967 +/- 0.1450
ExtraTreesClassifier Cross-Validation Precision: 0.5918 +/- 0.1403
ExtraTreesClassifier Cross-Validation Recall: 0.6196 +/- 0.1306
ExtraTreesClassifier Cross-Validation F1-Score: 0.5680 +/- 0.1364
0.25


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5680 +/- 0.1458
ExtraTreesClassifier Cross-Validation Precision: 0.5547 +/- 0.1411
ExtraTreesClassifier Cross-Validation Recall: 0.5114 +/- 0.1778
ExtraTreesClassifier Cross-Validation F1-Score: 0.5004 +/- 0.1655
0.3


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5904 +/- 0.1274
ExtraTreesClassifier Cross-Validation Precision: 0.5637 +/- 0.1285
ExtraTreesClassifier Cross-Validation Recall: 0.5423 +/- 0.1569
ExtraTreesClassifier Cross-Validation F1-Score: 0.5286 +/- 0.1335
0.35


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5724 +/- 0.1239
ExtraTreesClassifier Cross-Validation Precision: 0.5425 +/- 0.1153
ExtraTreesClassifier Cross-Validation Recall: 0.5279 +/- 0.1363
ExtraTreesClassifier Cross-Validation F1-Score: 0.5079 +/- 0.1275
0.4


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5882 +/- 0.1360
ExtraTreesClassifier Cross-Validation Precision: 0.5809 +/- 0.1375
ExtraTreesClassifier Cross-Validation Recall: 0.5961 +/- 0.1655
ExtraTreesClassifier Cross-Validation F1-Score: 0.5463 +/- 0.1597
0.45


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.6042 +/- 0.1331
ExtraTreesClassifier Cross-Validation Precision: 0.5501 +/- 0.1415
ExtraTreesClassifier Cross-Validation Recall: 0.5544 +/- 0.1494
ExtraTreesClassifier Cross-Validation F1-Score: 0.5262 +/- 0.1484
0.5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5944 +/- 0.0603
ExtraTreesClassifier Cross-Validation Precision: 0.5912 +/- 0.0431
ExtraTreesClassifier Cross-Validation Recall: 0.5682 +/- 0.0811
ExtraTreesClassifier Cross-Validation F1-Score: 0.5505 +/- 0.0588
0.55


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5944 +/- 0.0603
ExtraTreesClassifier Cross-Validation Precision: 0.5912 +/- 0.0431
ExtraTreesClassifier Cross-Validation Recall: 0.5682 +/- 0.0811
ExtraTreesClassifier Cross-Validation F1-Score: 0.5505 +/- 0.0588
0.6


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5827 +/- 0.0635
ExtraTreesClassifier Cross-Validation Precision: 0.5967 +/- 0.0714
ExtraTreesClassifier Cross-Validation Recall: 0.5904 +/- 0.1085
ExtraTreesClassifier Cross-Validation F1-Score: 0.5549 +/- 0.0851
0.65


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5844 +/- 0.1088
ExtraTreesClassifier Cross-Validation Precision: 0.5928 +/- 0.1054
ExtraTreesClassifier Cross-Validation Recall: 0.5963 +/- 0.1511
ExtraTreesClassifier Cross-Validation F1-Score: 0.5618 +/- 0.1263
0.7


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5844 +/- 0.1088
ExtraTreesClassifier Cross-Validation Precision: 0.5928 +/- 0.1054
ExtraTreesClassifier Cross-Validation Recall: 0.5963 +/- 0.1511
ExtraTreesClassifier Cross-Validation F1-Score: 0.5618 +/- 0.1263
0.75


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5844 +/- 0.1088
ExtraTreesClassifier Cross-Validation Precision: 0.5928 +/- 0.1054
ExtraTreesClassifier Cross-Validation Recall: 0.5963 +/- 0.1511
ExtraTreesClassifier Cross-Validation F1-Score: 0.5618 +/- 0.1263
0.8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5844 +/- 0.1088
ExtraTreesClassifier Cross-Validation Precision: 0.5928 +/- 0.1054
ExtraTreesClassifier Cross-Validation Recall: 0.5963 +/- 0.1511
ExtraTreesClassifier Cross-Validation F1-Score: 0.5618 +/- 0.1263
0.85


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5844 +/- 0.1088
ExtraTreesClassifier Cross-Validation Precision: 0.5928 +/- 0.1054
ExtraTreesClassifier Cross-Validation Recall: 0.5963 +/- 0.1511
ExtraTreesClassifier Cross-Validation F1-Score: 0.5618 +/- 0.1263
0.9


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier Cross-Validation Accuracy: 0.5844 +/- 0.1088
ExtraTreesClassifier Cross-Validation Precision: 0.5928 +/- 0.1054
ExtraTreesClassifier Cross-Validation Recall: 0.5963 +/- 0.1511
ExtraTreesClassifier Cross-Validation F1-Score: 0.5618 +/- 0.1263
