In [33]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
import joblib
import os

from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier

In [34]:
feat_train = pd.read_csv("radiomics_features_training_original.csv")
brats_dir = "./BraTS2020" 
path_train = os.path.join(brats_dir, "BraTS2020_TrainingData", "MICCAI_BraTS2020_TrainingData")
map_train = pd.read_csv(os.path.join(path_train, "name_mapping.csv"))
feat_train_perturbed = pd.read_csv("radiomics_features_training_perturbed.csv")

In [35]:

# Merge features with map train to get labels

data_train = feat_train.merge(map_train, left_on="case", right_on="BraTS_2020_subject_ID")
data_train.head()

Unnamed: 0,case,modality,mask_type,original_shape_Elongation,original_shape_Flatness,original_shape_LeastAxisLength,original_shape_MajorAxisLength,original_shape_Maximum2DDiameterColumn,original_shape_Maximum2DDiameterRow,original_shape_Maximum2DDiameterSlice,...,original_ngtdm_Coarseness,original_ngtdm_Complexity,original_ngtdm_Contrast,original_ngtdm_Strength,Grade,BraTS_2017_subject_ID,BraTS_2018_subject_ID,TCGA_TCIA_subject_ID,BraTS_2019_subject_ID,BraTS_2020_subject_ID
0,BraTS20_Training_082,t1,original,0.615924,0.558094,13.056671,23.395129,19.416488,26.019224,25.317978,...,0.004038,19.403883,0.012255,0.145263,HGG,Brats17_CBICA_AZD_1,Brats18_CBICA_AZD_1,,BraTS19_CBICA_AZD_1,BraTS20_Training_082
1,BraTS20_Training_082,t1ce,original,0.615924,0.558094,13.056671,23.395129,19.416488,26.019224,25.317978,...,0.004579,171.683748,0.079231,0.489439,HGG,Brats17_CBICA_AZD_1,Brats18_CBICA_AZD_1,,BraTS19_CBICA_AZD_1,BraTS20_Training_082
2,BraTS20_Training_082,t2,original,0.615924,0.558094,13.056671,23.395129,19.416488,26.019224,25.317978,...,0.004393,24.992246,0.016806,0.190565,HGG,Brats17_CBICA_AZD_1,Brats18_CBICA_AZD_1,,BraTS19_CBICA_AZD_1,BraTS20_Training_082
3,BraTS20_Training_082,flair,original,0.615924,0.558094,13.056671,23.395129,19.416488,26.019224,25.317978,...,0.004867,79.792367,0.021403,0.483111,HGG,Brats17_CBICA_AZD_1,Brats18_CBICA_AZD_1,,BraTS19_CBICA_AZD_1,BraTS20_Training_082
4,BraTS20_Training_244,t1,original,0.73997,0.638121,33.263382,52.127075,55.605755,62.177166,61.188234,...,0.000508,262.315113,0.012017,0.353904,HGG,Brats17_TCIA_603_1,Brats18_TCIA06_603_1,TCGA-19-5958,BraTS19_TCIA06_603_1,BraTS20_Training_244


In [36]:
results = []

In [37]:
# Select features and target for training
# Use only original features, convert to NumPy arrays
X = data_train.filter(regex="original").values
y = data_train["Grade"].values

# Split data into training, validation and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
#X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [38]:
def evaluate_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return accuracy_score(y_test, y_pred), precision_score(y_test, y_pred, pos_label="LGG"), recall_score(y_test, y_pred, pos_label="LGG")

In [39]:
# Train a logistic regression model
param_grid = {"C": [0.1, 1, 10, 100]}
model = GridSearchCV(LogisticRegression(max_iter=1000), param_grid, cv=5)

# Evaluate the model
accuracy, precision, recall = evaluate_model(model, X_train_scaled, y_train, X_test_scaled, y_test)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

# Save the model
joblib.dump(model, "model.pkl")
joblib.dump(scaler, "scaler.pkl")

results.append({"model": "LogisticRegression", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": None, "dataset": "original"})


Accuracy: 0.90, Precision: 0.85, Recall: 0.67


In [40]:
# Train a logistic regression model with PCA features

param_grid = {"logisticregression__C": [0.1, 1, 10, 100], "pca__n_components": [0.95]}

pipe = Pipeline([("pca", PCA()), ("logisticregression", LogisticRegression(max_iter=1000))])
model = GridSearchCV(pipe, param_grid, cv=5)

# Evaluate the model
accuracy, precision, recall = evaluate_model(model, X_train_scaled, y_train, X_test_scaled, y_test)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

# Save the model
joblib.dump(model, "model_pca.pkl")

results.append({"model": "LogisticRegression", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": model.best_params_["pca__n_components"], "dataset": "original"})




Accuracy: 0.85, Precision: 0.80, Recall: 0.48


In [41]:
# Train a random forest model
param_grid = {"n_estimators": [100, 200, 300], "max_depth": [10, 20, 30]}
model = GridSearchCV(RandomForestClassifier(random_state=0), param_grid, cv=5)

# Evaluate the model
accuracy, precision, recall = evaluate_model(model, X_train_scaled, y_train, X_test_scaled, y_test)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

# Save the model
joblib.dump(model, "model_rf.pkl")

results.append({"model": "RandomForest", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": None, "dataset": "original"})


Accuracy: 0.92, Precision: 0.92, Recall: 0.70


In [42]:
# Train a random forest model with PCA features
param_grid = {"randomforestclassifier__n_estimators": [100, 200, 300, 400, 500], "randomforestclassifier__max_depth": [10, 20, 30], "pca__n_components": [10, 20, 30, 40]}
pipe = Pipeline([("pca", PCA()), ("randomforestclassifier", RandomForestClassifier(random_state=0))])
model = GridSearchCV(pipe, param_grid, cv=5)

# Evaluate the model
accuracy, precision, recall = evaluate_model(model, X_train_scaled, y_train, X_test_scaled, y_test)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

# Save the model
joblib.dump(model, "model_rf_pca.pkl")

results.append({"model": "RandomForest", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": model.best_params_["pca__n_components"], "dataset": "original"}) 


Accuracy: 0.87, Precision: 0.94, Recall: 0.48


In [43]:
# Train models with perturbed features
X_perturbed = feat_train_perturbed.filter(regex="original").values
y_perturbed = data_train["Grade"].values

X_train_perturbed, X_test_perturbed, y_train_perturbed, y_test_perturbed = train_test_split(X_perturbed, y_perturbed, test_size=0.2, random_state=0)

scaler = StandardScaler()
X_train_scaled_perturbed = scaler.fit_transform(X_train_perturbed)
X_test_scaled_perturbed = scaler.transform(X_test_perturbed)

In [44]:
# Train logistic regression model with perturbed features
param_grid = {"C": [0.1, 1, 10, 100]}
model = GridSearchCV(LogisticRegression(max_iter=1000), param_grid, cv=5)

# Evaluate the model
accuracy, precision, recall = evaluate_model(model, X_train_scaled_perturbed, y_train_perturbed, X_test_scaled_perturbed, y_test_perturbed)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

# Save the model
joblib.dump(model, "model_perturbed.pkl")
joblib.dump(scaler, "scaler_perturbed.pkl")

results.append({"model": "LogisticRegression", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": None, "dataset": "perturbed"})

Accuracy: 0.89, Precision: 0.80, Recall: 0.70


In [45]:
# Train logistic regression model with PCA features and perturbed features
param_grid = {"logisticregression__C": [0.1, 1, 10, 100], "pca__n_components": [0.95]}
pipe = Pipeline([("pca", PCA()), ("logisticregression", LogisticRegression(max_iter=1000))])
model = GridSearchCV(pipe, param_grid, cv=5)

# Evaluate the model
accuracy, precision, recall = evaluate_model(model, X_train_scaled_perturbed, y_train_perturbed, X_test_scaled_perturbed, y_test_perturbed)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

# Save the model
joblib.dump(model, "model_pca_perturbed.pkl")

results.append({"model": "LogisticRegression", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": model.best_params_["pca__n_components"], "dataset": "perturbed"})

Accuracy: 0.88, Precision: 0.86, Recall: 0.55


In [46]:
# Train random forest model with perturbed features
param_grid = {"n_estimators": [100, 200, 300], "max_depth": [10, 20, 30]}
model = GridSearchCV(RandomForestClassifier(random_state=0), param_grid, cv=5)

# Evaluate the model
accuracy, precision, recall = evaluate_model(model, X_train_scaled_perturbed, y_train_perturbed, X_test_scaled_perturbed, y_test_perturbed)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

# Save the model
joblib.dump(model, "model_rf_perturbed.pkl")

results.append({"model": "RandomForest", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": None, "dataset": "perturbed"})

Accuracy: 0.91, Precision: 0.90, Recall: 0.70


In [47]:
# Train random forest model with PCA features and perturbed features
param_grid = {"randomforestclassifier__n_estimators": [100, 200, 300, 400, 500], "randomforestclassifier__max_depth": [10, 20, 30], "pca__n_components": [0.95]}
pipe = Pipeline([("pca", PCA()), ("randomforestclassifier", RandomForestClassifier(random_state=0))])
model = GridSearchCV(pipe, param_grid, cv=5)

# Evaluate the model
accuracy, precision, recall = evaluate_model(model, X_train_scaled_perturbed, y_train_perturbed, X_test_scaled_perturbed, y_test_perturbed)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

# Save the model
joblib.dump(model, "model_rf_pca_perturbed.pkl")

results.append({"model": "RandomForest", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": model.best_params_["pca__n_components"], "dataset": "perturbed"})


Accuracy: 0.88, Precision: 0.92, Recall: 0.52


In [48]:
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,model,accuracy,precision,recall,params,n_components,dataset
0,LogisticRegression,0.897959,0.849057,0.671642,{'C': 10},,original
1,LogisticRegression,0.853741,0.8,0.477612,"{'logisticregression__C': 0.1, 'pca__n_compone...",0.95,original
2,RandomForest,0.918367,0.921569,0.701493,"{'max_depth': 30, 'n_estimators': 100}",,original
3,RandomForest,0.87415,0.941176,0.477612,"{'pca__n_components': 40, 'randomforestclassif...",40.0,original
4,LogisticRegression,0.891156,0.79661,0.701493,{'C': 100},,perturbed
5,LogisticRegression,0.877551,0.860465,0.552239,"{'logisticregression__C': 1, 'pca__n_component...",0.95,perturbed
6,RandomForest,0.914966,0.903846,0.701493,"{'max_depth': 20, 'n_estimators': 100}",,perturbed
7,RandomForest,0.880952,0.921053,0.522388,"{'pca__n_components': 0.95, 'randomforestclass...",0.95,perturbed


In [49]:
# Get feature importances from the best random forest model
model = joblib.load("model_rf_perturbed.pkl")
importances = model.best_estimator_.feature_importances_
importances_df = pd.DataFrame({"feature": data_train.filter(regex="original").columns, "importance": importances})
importances_df = importances_df.sort_values("importance", ascending=False)
importances_df.head(20)

Unnamed: 0,feature,importance
12,original_shape_SurfaceVolumeRatio,0.086829
8,original_shape_MeshVolume,0.064858
13,original_shape_VoxelVolume,0.059586
10,original_shape_Sphericity,0.033392
91,original_glszm_LargeAreaHighGrayLevelEmphasis,0.030181
57,original_gldm_DependenceNonUniformity,0.026946
103,original_ngtdm_Coarseness,0.026722
79,original_glrlm_RunLengthNonUniformity,0.025311
2,original_shape_LeastAxisLength,0.02399
4,original_shape_Maximum2DDiameterColumn,0.019635


In [50]:
# Train a model without shape features
X = data_train.filter(regex="original").drop(columns=data_train.filter(regex="shape").columns).values
y = data_train["Grade"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train.shape

(1174, 93)

In [51]:

param_grid = {"C": [0.1, 1, 10, 100]}
model = GridSearchCV(LogisticRegression(max_iter=1000), param_grid, cv=5)

accuracy, precision, recall = evaluate_model(model, X_train_scaled, y_train, X_test_scaled, y_test)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

results.append({"model": "LogisticRegression", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": None, "dataset": "original_no_shape"})

Accuracy: 0.89, Precision: 0.81, Recall: 0.66


In [52]:
# Train a model without shape features and with PCA
param_grid = {"logisticregression__C": [0.1, 1, 10, 100], "pca__n_components": [0.95]}
pipe = Pipeline([("pca", PCA()), ("logisticregression", LogisticRegression(max_iter=1000))])
model = GridSearchCV(pipe, param_grid, cv=5)

accuracy, precision, recall = evaluate_model(model, X_train_scaled, y_train, X_test_scaled, y_test)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

results.append({"model": "LogisticRegression", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": model.best_params_["pca__n_components"], "dataset": "original_no_shape"})


Accuracy: 0.85, Precision: 0.82, Recall: 0.42


In [53]:
# Train a model without shape features and with perturbed features
X_perturbed = feat_train_perturbed.filter(regex="original").drop(columns=feat_train_perturbed.filter(regex="shape").columns).values
y_perturbed = data_train["Grade"].values

X_train_perturbed, X_test_perturbed, y_train_perturbed, y_test_perturbed = train_test_split(X_perturbed, y_perturbed, test_size=0.2, random_state=0)
X_train_scaled_perturbed = scaler.fit_transform(X_train_perturbed)
X_test_scaled_perturbed = scaler.transform(X_test_perturbed)

param_grid = {"C": [0.1, 1, 10, 100]}
model = GridSearchCV(LogisticRegression(max_iter=1000), param_grid, cv=5)

accuracy, precision, recall = evaluate_model(model, X_train_scaled_perturbed, y_train_perturbed, X_test_scaled_perturbed, y_test_perturbed)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

results.append({"model": "LogisticRegression", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": None, "dataset": "perturbed_no_shape"})


Accuracy: 0.87, Precision: 0.74, Recall: 0.69


In [54]:
# Train a model without shape features and with PCA and perturbed features
param_grid = {"logisticregression__C": [0.1, 1, 10, 100], "pca__n_components": [0.95]}
pipe = Pipeline([("pca", PCA()), ("logisticregression", LogisticRegression(max_iter=1000))])
model = GridSearchCV(pipe, param_grid, cv=5)

accuracy, precision, recall = evaluate_model(model, X_train_scaled_perturbed, y_train_perturbed, X_test_scaled_perturbed, y_test_perturbed)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

results.append({"model": "LogisticRegression", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": model.best_params_["pca__n_components"], "dataset": "perturbed_no_shape"})


Accuracy: 0.87, Precision: 0.87, Recall: 0.49


In [55]:
# Train a model without shape features and with random forest   
param_grid = {"n_estimators": [100, 200, 300], "max_depth": [10, 20, 30]}
model = GridSearchCV(RandomForestClassifier(random_state=0), param_grid, cv=5)

accuracy, precision, recall = evaluate_model(model, X_train_scaled, y_train, X_test_scaled, y_test)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

results.append({"model": "RandomForest", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": None, "dataset": "original_no_shape"})


Accuracy: 0.86, Precision: 0.75, Recall: 0.57


In [None]:
# Train a model without shape features and with PCA and random forest
param_grid = {"randomforestclassifier__n_estimators": [100, 200, 300, 400, 500], "randomforestclassifier__max_depth": [10, 20, 30], "pca__n_components": [0.95]}
pipe = Pipeline([("pca", PCA()), ("randomforestclassifier", RandomForestClassifier(random_state=0))])
model = GridSearchCV(pipe, param_grid, cv=5)

accuracy, precision, recall = evaluate_model(model, X_train_scaled, y_train, X_test_scaled, y_test)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

results.append({"model": "RandomForest", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": model.best_params_["pca__n_components"], "dataset": "original_no_shape"})

# Train a model without shape features and with perturbed features and random forest
param_grid = {"n_estimators": [100, 200, 300], "max_depth": [10, 20, 30]}
model = GridSearchCV(RandomForestClassifier(random_state=0), param_grid, cv=5)

accuracy, precision, recall = evaluate_model(model, X_train_scaled_perturbed, y_train_perturbed, X_test_scaled_perturbed, y_test_perturbed)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

results.append({"model": "RandomForest", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": None, "dataset": "perturbed_no_shape"})


Accuracy: 0.86, Precision: 0.84, Recall: 0.46
Accuracy: 0.87, Precision: 0.89, Recall: 0.51


In [57]:
# Train a model without shape features and with PCA and perturbed features and random forest
param_grid = {"randomforestclassifier__n_estimators": [100, 200, 300, 400, 500], "randomforestclassifier__max_depth": [10, 20, 30], "pca__n_components": [0.95]}
pipe = Pipeline([("pca", PCA()), ("randomforestclassifier", RandomForestClassifier(random_state=0))])
model = GridSearchCV(pipe, param_grid, cv=5)

accuracy, precision, recall = evaluate_model(model, X_train_scaled_perturbed, y_train_perturbed, X_test_scaled_perturbed, y_test_perturbed)
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")

results.append({"model": "RandomForest", "accuracy": accuracy, "precision": precision, "recall": recall, "params": model.best_params_, "n_components": model.best_params_["pca__n_components"], "dataset": "perturbed_no_shape"})


Accuracy: 0.87, Precision: 0.85, Recall: 0.51


In [67]:
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,model,accuracy,precision,recall,params,n_components,dataset
0,LogisticRegression,0.897959,0.849057,0.671642,{'C': 10},,original
1,LogisticRegression,0.853741,0.8,0.477612,"{'logisticregression__C': 0.1, 'pca__n_compone...",0.95,original
2,RandomForest,0.918367,0.921569,0.701493,"{'max_depth': 30, 'n_estimators': 100}",,original
3,RandomForest,0.87415,0.941176,0.477612,"{'pca__n_components': 40, 'randomforestclassif...",40.0,original
4,LogisticRegression,0.891156,0.79661,0.701493,{'C': 100},,perturbed
5,LogisticRegression,0.877551,0.860465,0.552239,"{'logisticregression__C': 1, 'pca__n_component...",0.95,perturbed
6,RandomForest,0.914966,0.903846,0.701493,"{'max_depth': 20, 'n_estimators': 100}",,perturbed
7,RandomForest,0.880952,0.921053,0.522388,"{'pca__n_components': 0.95, 'randomforestclass...",0.95,perturbed
8,LogisticRegression,0.887755,0.814815,0.656716,{'C': 100},,original_no_shape
9,LogisticRegression,0.846939,0.823529,0.41791,"{'logisticregression__C': 0.1, 'pca__n_compone...",0.95,original_no_shape


In [59]:
results_df[results_df['model'] == 'LogisticRegression'].sort_values('accuracy', ascending=False)

Unnamed: 0,model,accuracy,precision,recall,params,n_components,dataset
0,LogisticRegression,0.897959,0.849057,0.671642,{'C': 10},,original
4,LogisticRegression,0.891156,0.79661,0.701493,{'C': 100},,perturbed
8,LogisticRegression,0.887755,0.814815,0.656716,{'C': 100},,original_no_shape
5,LogisticRegression,0.877551,0.860465,0.552239,"{'logisticregression__C': 1, 'pca__n_component...",0.95,perturbed
10,LogisticRegression,0.87415,0.741935,0.686567,{'C': 100},,perturbed_no_shape
11,LogisticRegression,0.867347,0.868421,0.492537,"{'logisticregression__C': 1, 'pca__n_component...",0.95,perturbed_no_shape
1,LogisticRegression,0.853741,0.8,0.477612,"{'logisticregression__C': 0.1, 'pca__n_compone...",0.95,original
9,LogisticRegression,0.846939,0.823529,0.41791,"{'logisticregression__C': 0.1, 'pca__n_compone...",0.95,original_no_shape


In [60]:
rf = results_df[results_df['model'] == 'RandomForest'].sort_values('accuracy', ascending=False)

In [61]:
rf.to_latex("rf_results.tex", index=False)

In [62]:
results_df[['model', 'n_components', 'accuracy', 'precision', 'recall', 'dataset']].where(results_df['dataset'].isin(['original','original_no_shape'])).sort_values('accuracy', ascending=False).dropna(how='all')

Unnamed: 0,model,n_components,accuracy,precision,recall,dataset
2,RandomForest,,0.918367,0.921569,0.701493,original
0,LogisticRegression,,0.897959,0.849057,0.671642,original
8,LogisticRegression,,0.887755,0.814815,0.656716,original_no_shape
3,RandomForest,40.0,0.87415,0.941176,0.477612,original
12,RandomForest,,0.857143,0.745098,0.567164,original_no_shape
13,RandomForest,0.95,0.857143,0.837838,0.462687,original_no_shape
1,LogisticRegression,0.95,0.853741,0.8,0.477612,original
9,LogisticRegression,0.95,0.846939,0.823529,0.41791,original_no_shape


In [65]:
results_df[['model', 'n_components', 'accuracy', 'precision', 'recall', 'dataset']].where(results_df['dataset'].isin(['perturbed','perturbed_no_shape'])).sort_values('accuracy', ascending=False).dropna(how='all')

Unnamed: 0,model,n_components,accuracy,precision,recall,dataset
6,RandomForest,,0.914966,0.903846,0.701493,perturbed
4,LogisticRegression,,0.891156,0.79661,0.701493,perturbed
7,RandomForest,0.95,0.880952,0.921053,0.522388,perturbed
5,LogisticRegression,0.95,0.877551,0.860465,0.552239,perturbed
10,LogisticRegression,,0.87415,0.741935,0.686567,perturbed_no_shape
11,LogisticRegression,0.95,0.867347,0.868421,0.492537,perturbed_no_shape
15,RandomForest,0.95,0.867347,0.85,0.507463,perturbed_no_shape
