In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.applications.efficientnet import preprocess_input
import lightgbm as lgb
import xgboost as xgb
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_score



In [2]:

# Define paths
data_dir = r"C:\Users\jakki\OneDrive\Desktop\project"
down_syndrome_dir = os.path.join(data_dir, "downSyndrome")
healthy_dir = os.path.join(data_dir, "healty")



In [3]:
# Load EfficientNet model without top layers
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
model = Model(inputs=base_model.input, outputs=x)


In [4]:

# Function to extract features from images
def extract_features(image_path):
    img = load_img(image_path, target_size=(224, 224))
    x = img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x)
    return features.flatten()



In [5]:
# Load images and extract features
image_paths = [] 
labels = []
for img in os.listdir(down_syndrome_dir):
    image_paths.append(os.path.join(down_syndrome_dir, img))
    labels.append(1)  # Down syndrome class
for img in os.listdir(healthy_dir):
    image_paths.append(os.path.join(healthy_dir, img))
    labels.append(0)  # Healthy class

features = []
for image_path in image_paths:
    features.append(extract_features(image_path))

features = np.array(features)
labels = np.array(labels)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90m



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

In [6]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

test_accuracies = []
train_accuracies = []

for train_index, test_index in skf.split(features, labels):
    X_train, X_test = features[train_index], features[test_index]
    y_train, y_test = labels[train_index], labels[test_index]

    # Create and train models within each fold
    lgb_model = lgb.LGBMClassifier()
    xgb_model = xgb.XGBClassifier()
    ensemble_model = VotingClassifier(estimators=[('lgbm', lgb_model), ('xgb', xgb_model)], voting='hard')
    ensemble_model.fit(X_train, y_train)

    # Evaluate and store test accuracy for the fold
    y_pred_test = ensemble_model.predict(X_test)
    accuracy_test = accuracy_score(y_test, y_pred_test)
    test_accuracies.append(accuracy_test)

    # Calculate and store train accuracy for the fold
    y_pred_train = ensemble_model.predict(X_train)
    accuracy_train = accuracy_score(y_train, y_pred_train)
    train_accuracies.append(accuracy_train)
    print(f"Train Accuracy for fold: {accuracy_train}")
    
    # Generate and print classification report and precision for test set
    report = classification_report(y_test, y_pred_test, output_dict=True)
    precision = precision_score(y_test, y_pred_test)
    print(f"Classification Report for fold:\n{report}")
    print(f"Precision: {precision}")

# Calculate and print average test and train accuracies across folds
average_test_accuracy = np.mean(test_accuracies)
average_train_accuracy = np.mean(train_accuracies)
print("Average Test Accuracy:", average_test_accuracy)
print("Average Train Accuracy:", average_train_accuracy)

[LightGBM] [Info] Number of positive: 1200, number of negative: 1199
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.115532 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 326400
[LightGBM] [Info] Number of data points in the train set: 2399, number of used features: 1280
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500208 -> initscore=0.000834
[LightGBM] [Info] Start training from score 0.000834
Train Accuracy for fold: 1.0
Classification Report for fold:
{'0': {'precision': 0.7955974842767296, 'recall': 0.8433333333333334, 'f1-score': 0.8187702265372169, 'support': 300.0}, '1': {'precision': 0.8333333333333334, 'recall': 0.7833333333333333, 'f1-score': 0.8075601374570447, 'support': 300.0}, 'accuracy': 0.8133333333333334, 'macro avg': {'precision': 0.8144654088050315, 'recall': 0.8133333333333334, 'f1-score': 0.8131651819971308, 'support': 600.0}, 'weighted avg': {'precision': 0.8144654088050

In [7]:
from sklearn.tree import DecisionTreeClassifier
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

test_accuracies = []
train_accuracies = []

for train_index, test_index in skf.split(features, labels):
    X_train, X_test = features[train_index], features[test_index]
    y_train, y_test = labels[train_index], labels[test_index]

    # Create and train Decision Tree model within each fold
    decision_tree_model = DecisionTreeClassifier(random_state=42)
    decision_tree_model.fit(X_train, y_train)

    # Calculate and store test accuracy for the fold
    y_pred_test = decision_tree_model.predict(X_test)
    accuracy_test = accuracy_score(y_test, y_pred_test)
    test_accuracies.append(accuracy_test)

    # Calculate and store train accuracy for the fold
    y_pred_train = decision_tree_model.predict(X_train)
    accuracy_train = accuracy_score(y_train, y_pred_train)
    train_accuracies.append(accuracy_train)
    print(f"Train Accuracy for fold: {accuracy_train}")
    
    # Generate and print classification report and precision
    report = classification_report(y_test, y_pred_test, output_dict=True)
    precision = precision_score(y_test, y_pred_test)
    print(f"Classification Report for fold:\n{report}")
    print(f"Precision: {precision}")

# Calculate and print average test and train accuracies across folds
average_test_accuracy = np.mean(test_accuracies)
average_train_accuracy = np.mean(train_accuracies)
print("Average Test Accuracy:", average_test_accuracy)
print("Average Train Accuracy:", average_train_accuracy)

Train Accuracy for fold: 1.0
Classification Report for fold:
{'0': {'precision': 0.6405228758169934, 'recall': 0.6533333333333333, 'f1-score': 0.6468646864686468, 'support': 300.0}, '1': {'precision': 0.6462585034013606, 'recall': 0.6333333333333333, 'f1-score': 0.6397306397306397, 'support': 300.0}, 'accuracy': 0.6433333333333333, 'macro avg': {'precision': 0.643390689609177, 'recall': 0.6433333333333333, 'f1-score': 0.6432976630996432, 'support': 600.0}, 'weighted avg': {'precision': 0.643390689609177, 'recall': 0.6433333333333333, 'f1-score': 0.6432976630996433, 'support': 600.0}}
Precision: 0.6462585034013606
Train Accuracy for fold: 1.0
Classification Report for fold:
{'0': {'precision': 0.6594427244582043, 'recall': 0.71, 'f1-score': 0.6837881219903692, 'support': 300.0}, '1': {'precision': 0.6859205776173285, 'recall': 0.6333333333333333, 'f1-score': 0.658578856152513, 'support': 300.0}, 'accuracy': 0.6716666666666666, 'macro avg': {'precision': 0.6726816510377664, 'recall': 0.6

In [8]:
from sklearn.ensemble import RandomForestClassifier
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

test_accuracies = []
train_accuracies = []

for train_index, test_index in skf.split(features, labels):
    X_train, X_test = features[train_index], features[test_index]
    y_train, y_test = labels[train_index], labels[test_index]

    # Create and train Random Forest model within each fold
    random_forest_model = RandomForestClassifier(n_estimators=100, random_state=42)
    random_forest_model.fit(X_train, y_train)

    # Evaluate and store test accuracy for the fold
    y_pred_test = random_forest_model.predict(X_test)
    accuracy_test = accuracy_score(y_test, y_pred_test)
    test_accuracies.append(accuracy_test)

    # Calculate and store train accuracy for the fold
    y_pred_train = random_forest_model.predict(X_train)
    accuracy_train = accuracy_score(y_train, y_pred_train)
    train_accuracies.append(accuracy_train)
    print(f"Train Accuracy for fold: {accuracy_train}")

    # Generate and print classification report and precision
    report = classification_report(y_test, y_pred_test, output_dict=True)
    precision = precision_score(y_test, y_pred_test)
    print(f"Classification Report for fold:\n{report}")
    print(f"Precision: {precision}")

# Calculate and print average test and train accuracies across folds
average_test_accuracy = np.mean(test_accuracies)
average_train_accuracy = np.mean(train_accuracies)
print("Average Test Accuracy:", average_test_accuracy)
print("Average Train Accuracy:", average_train_accuracy)

Train Accuracy for fold: 1.0
Classification Report for fold:
{'0': {'precision': 0.7854671280276817, 'recall': 0.7566666666666667, 'f1-score': 0.7707979626485568, 'support': 300.0}, '1': {'precision': 0.7652733118971061, 'recall': 0.7933333333333333, 'f1-score': 0.779050736497545, 'support': 300.0}, 'accuracy': 0.775, 'macro avg': {'precision': 0.775370219962394, 'recall': 0.775, 'f1-score': 0.7749243495730509, 'support': 600.0}, 'weighted avg': {'precision': 0.7753702199623939, 'recall': 0.775, 'f1-score': 0.7749243495730509, 'support': 600.0}}
Precision: 0.7652733118971061
Train Accuracy for fold: 1.0
Classification Report for fold:
{'0': {'precision': 0.8109090909090909, 'recall': 0.7433333333333333, 'f1-score': 0.7756521739130435, 'support': 300.0}, '1': {'precision': 0.7630769230769231, 'recall': 0.8266666666666667, 'f1-score': 0.7936, 'support': 300.0}, 'accuracy': 0.785, 'macro avg': {'precision': 0.786993006993007, 'recall': 0.7849999999999999, 'f1-score': 0.7846260869565218, '