In [135]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, roc_auc_score, fbeta_score, precision_score, recall_score, roc_curve

In [136]:
import cv2
import numpy as np

def extract_morphological_features(image, max_contours=1):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Threshold to get binary image
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    features = []
    for contour in contours[:max_contours]:
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        features.extend([area, perimeter])
    
    # Riempire con zeri se ci sono meno contorni del massimo
    while len(features) < max_contours * 2:
        features.append(0)
    
    return np.array(features)

In [137]:
from skimage.feature import *
def extract_texture_features(image):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    glcm = greycomatrix(gray, [1], [0], symmetric=True, normed=True) # Valta coppue di pixel a distanza 1 e angolo 0 
    
    contrast = greycoprops(glcm, 'contrast')[0, 0]
    dissimilarity = greycoprops(glcm, 'dissimilarity')[0, 0]
    homogeneity = greycoprops(glcm, 'homogeneity')[0, 0]
    energy = greycoprops(glcm, 'energy')[0, 0]
    correlation = greycoprops(glcm, 'correlation')[0, 0]
    
    texture_features = [contrast, dissimilarity, homogeneity, energy, correlation]
    
    return np.array(texture_features)

In [138]:
def extract_color_features(image, hist_size=512):
    # Calculate color histogram
    hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    
    # Assicurarsi che l'istogramma abbia una dimensione fissa
    if hist.size < hist_size:
        hist = np.pad(hist, (0, hist_size - hist.size), 'constant')
    else:
        hist = hist[:hist_size]
    
    return hist


In [139]:
def extract_features(image_path):
    image = cv2.imread(image_path)
    
    morphological_features = extract_morphological_features(image)
    #texture_features = extract_texture_features(image)
    color_features = extract_color_features(image)
    
    # Combine all features into a single feature vector
    features = np.hstack([morphological_features, color_features])
    
    return features

In [140]:
# Load datasets
df = pd.read_csv("isolated_cells/single_cell_dataset.csv")
df

Unnamed: 0,filename,class
0,img-0137_png_jpg.rf.0ea8b8dcfc295e58f709f8eadc...,neutrophil
1,img-0073_png_jpg.rf.1e42284b7c00c8e1e02b03c9f1...,neutrophil
2,img-0197_png_jpg.rf.3104549baeb172a227efa2ac87...,neutrophil
3,img-0059_png_jpg.rf.90762669b9486e21e6ce570efb...,neutrophil
4,img-0081_png_jpg.rf.2151947735c50695395cfc7847...,neutrophil
...,...,...
7102,img-0082_png_jpg.rf.9719f27bbe102f331e38eb039e...,artefatto
7103,img-0224_png_jpg.rf.b4aae7b06a2394a5bedde89880...,artefatto
7104,img-0134_png_jpg.rf.4099dc986ce0b3a39a40de4780...,artefatto
7105,img-0048_png_jpg.rf.80ebfae57f4023c2d8ef9ff0ae...,artefatto


In [141]:
features = []
labels = []

# Estrarre features per ciascuna immagine
for index, row in df.iterrows():
        image_path = f"isolated_cells/{row['class']}/{row['filename']}"
        features_list = extract_features(image_path)

        features.append(features_list)
        labels.append(row['class'])
        

In [142]:
# Supponendo che 'features' sia la lista delle feature arrays e 'labels' sia la lista delle etichette
X = np.array(features)  # Converte la lista delle feature in un array numpy
y = np.array(labels)    # Converte la lista delle etichette in un array numpy

# Dividi il dataset in set di addestramento e set di test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [143]:
# Scegliere un modello, ad esempio, Random Forest Classifier
model = RandomForestClassifier()

# Addestrare il modello
model.fit(X_train, y_train)

# Prevedere con il modello
y_pred = model.predict(X_test)

# Valutare il modello
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 78.83%


In [144]:
print("------ RandomForestClassifier ------")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average="weighted"))
print("Recall:", recall_score(y_test, y_pred, average="weighted"))
print("---------------------------------------------------------")

------ RandomForestClassifier ------
Accuracy: 0.7883263009845288
Precision: 0.7748041499040983
Recall: 0.7883263009845288
---------------------------------------------------------


  _warn_prf(average, modifier, msg_start, len(result))


In [145]:
from sklearn.model_selection import cross_validate
cross_validate(model, X, y, cv=10, scoring=['accuracy'])

{'fit_time': array([1.66333008, 1.49780583, 1.57111144, 1.51226044, 1.55753779,
        1.60874081, 1.60093594, 1.56717372, 1.59463573, 1.6185317 ]),
 'score_time': array([0.02557778, 0.02298546, 0.02479792, 0.02400088, 0.02493644,
        0.02365351, 0.02633929, 0.02492642, 0.0243144 , 0.02315688]),
 'test_accuracy': array([0.7862166 , 0.79465541, 0.79746835, 0.78481013, 0.76933896,
        0.78199719, 0.79324895, 0.78591549, 0.80140845, 0.81126761])}