In [1]:
%pwd

'e:\\Github Projects\\Retinal_Blood_Vessels_Segmentation\\notebooks'

In [2]:
import sys
sys.path.append("E:\\Github Projects\\Retinal_Blood_Vessels_Segmentation")

In [3]:
import numpy as np

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    recall_score,
    accuracy_score,
    f1_score,
    precision_score,
    make_scorer
)
from sklearn.model_selection import GridSearchCV
from imblearn.under_sampling import RandomUnderSampler, NearMiss

from scripts.prepare_data import create_dataset_from_directory, crop_images

In [4]:
crop_images(source_dir="../images/train/img", destination_dir="../cropped_images/train/img")
crop_images(source_dir="../images/train/mask", destination_dir="../cropped_images/train/mask")
crop_images(source_dir="../images/test/img", destination_dir="../cropped_images/test/img")
crop_images(source_dir="../images/test/mask", destination_dir="../cropped_images/test/mask")

X_train, y_train = create_dataset_from_directory(dir="../cropped_images/train",
                                                 channel=1,
                                                 shape=None,
                                                 window_shape=(5, 5),
                                                 pad=True,
                                                 padding=(2, 2),
                                                 sample_size=10000,
                                                 seed=42)

y_train = (y_train/255).astype(int)

Image 1/21
Image 2/21
Image 3/21
Image 4/21
Image 5/21
Image 6/21
Image 7/21
Image 8/21
Image 9/21
Image 10/21
Image 11/21
Image 12/21
Image 13/21
Image 14/21
Image 15/21
Image 16/21
Image 17/21
Image 18/21
Image 19/21
Image 20/21
Image 21/21


In [5]:
X_train.shape, y_train.shape, np.unique(y_train, return_counts=True)

((210000, 58),
 (210000,),
 (array([0, 1]), array([194599,  15401], dtype=int64)))

In [6]:
# undersampler = RandomUnderSampler(random_state=42)
undersampler = NearMiss()

X_undersampled, y_undersampled = undersampler.fit_resample(X_train, y_train)

# del X_train
# del y_train

In [7]:
X_undersampled.shape, y_undersampled.shape, np.unique(y_undersampled, return_counts=True)

((30802, 58), (30802,), (array([0, 1]), array([15401, 15401], dtype=int64)))

In [8]:
def specificity(y_true, y_pred):
    return recall_score(y_true, y_pred, pos_label=0)


pipeline = Pipeline(steps=[('scaler', StandardScaler()),
                           ('clf', RandomForestClassifier(random_state=42))])

scoring = {
    'f1': make_scorer(f1_score),
    'accuracy': make_scorer(accuracy_score),
    'precision': make_scorer(precision_score),
    'recall': make_scorer(recall_score),
    'specificity': make_scorer(specificity)
}

params = [
    {
        "clf__max_depth": np.arange(6, 11, 2),
        "clf__n_estimators": np.arange(20, 61, 10),
        "scaler": [MinMaxScaler(), StandardScaler()]
    }
]

grid = GridSearchCV(pipeline,
                    params,
                    cv=4,
                    verbose=3,
                    scoring=scoring,
                    refit='f1')

grid.fit(X_undersampled, y_undersampled)

Fitting 4 folds for each of 30 candidates, totalling 120 fits
[CV 1/4] END clf__max_depth=6, clf__n_estimators=20, scaler=MinMaxScaler(); f1: (test=0.888) precision: (test=0.936) recall: (test=0.844) specificity: (test=0.942) total time=   1.0s
[CV 2/4] END clf__max_depth=6, clf__n_estimators=20, scaler=MinMaxScaler(); f1: (test=0.861) precision: (test=0.943) recall: (test=0.792) specificity: (test=0.952) total time=   0.9s
[CV 3/4] END clf__max_depth=6, clf__n_estimators=20, scaler=MinMaxScaler(); f1: (test=0.849) precision: (test=0.891) recall: (test=0.811) specificity: (test=0.901) total time=   1.0s
[CV 4/4] END clf__max_depth=6, clf__n_estimators=20, scaler=MinMaxScaler(); f1: (test=0.790) precision: (test=0.846) recall: (test=0.740) specificity: (test=0.865) total time=   1.0s
[CV 1/4] END clf__max_depth=6, clf__n_estimators=20, scaler=StandardScaler(); f1: (test=0.887) precision: (test=0.934) recall: (test=0.844) specificity: (test=0.941) total time=   1.0s
[CV 2/4] END clf__max

KeyboardInterrupt: 