In [71]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report, accuracy_score


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from sklearn.utils.class_weight import compute_class_weight



In [92]:
labels = pd.read_csv("/content/image_labels_train.csv")

In [93]:
labels.head()

Unnamed: 0,image_id,rad_id,Aortic enlargement,Atelectasis,Calcification,Cardiomegaly,Clavicle fracture,Consolidation,Edema,Emphysema,...,Pneumothorax,Pulmonary fibrosis,Rib fracture,Other lesion,COPD,Lung tumor,Pneumonia,Tuberculosis,Other diseases,No finding
0,000434271f63a053c4128a0ba6352c7f,R2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,000434271f63a053c4128a0ba6352c7f,R3,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,000434271f63a053c4128a0ba6352c7f,R6,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,00053190460d56c53cc3e57321387478,R11,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,00053190460d56c53cc3e57321387478,R2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [29]:
labels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45000 entries, 0 to 44999
Data columns (total 30 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   image_id            45000 non-null  object
 1   rad_id              45000 non-null  object
 2   Aortic enlargement  45000 non-null  int64 
 3   Atelectasis         45000 non-null  int64 
 4   Calcification       45000 non-null  int64 
 5   Cardiomegaly        45000 non-null  int64 
 6   Clavicle fracture   45000 non-null  int64 
 7   Consolidation       45000 non-null  int64 
 8   Edema               45000 non-null  int64 
 9   Emphysema           45000 non-null  int64 
 10  Enlarged PA         45000 non-null  int64 
 11  ILD                 45000 non-null  int64 
 12  Infiltration        45000 non-null  int64 
 13  Lung Opacity        45000 non-null  int64 
 14  Lung cavity         45000 non-null  int64 
 15  Lung cyst           45000 non-null  int64 
 16  Mediastinal shift   45

# **DATA ANALYSIS**

The following piece of code is showing that the different radiologists classify each x-ray to a different disease. Even in some cases one x ray is classified to multiple diseases by one radiologist.

In [30]:
# Define target columns
target_columns = ["COPD", "Lung tumor", "Pneumonia", "Tuberculosis", "Other diseases", "No finding"]

# Group by 'image_id' and sum the target columns
grouped = labels.groupby("image_id")[target_columns].sum()

# Identify rows where more than one target column has values greater than 0
rows_with_multiple_nonzero = grouped[grouped.gt(0).sum(axis=1) > 1]

# Reset index to make 'image_id' a separate column
rows_with_multiple_nonzero = rows_with_multiple_nonzero.reset_index()

# Display the resulting dataset
rows_with_multiple_nonzero.head()

Unnamed: 0,image_id,COPD,Lung tumor,Pneumonia,Tuberculosis,Other diseases,No finding
0,0005e8e3701dfb1dd93d53e2ff537b6e,0,2,3,2,0,0
1,001d127bad87592efe45a5c7678f8b8d,0,0,0,3,2,0
2,0046f681f078851293c4e710c4466058,0,0,0,3,1,0
3,009d4c31ebf87e51c5c8c160a4bd8006,0,0,2,0,3,0
4,00aca42a24e4ea6066cca2546150c36e,0,0,1,0,3,0


In [53]:
# Filter the original labels DataFrame using the image_ids from rows_with_multiple_nonzero
filtered_labels = labels[labels["image_id"].isin(rows_with_multiple_nonzero["image_id"])]
should_visualized = ["image_id", "rad_id"] + target_columns
filtered_labels[should_visualized].head()

Unnamed: 0,image_id,rad_id,COPD,Lung tumor,Pneumonia,Tuberculosis,Other diseases,No finding
6,0005e8e3701dfb1dd93d53e2ff537b6e,R10,0,0,1,1,0,0
7,0005e8e3701dfb1dd93d53e2ff537b6e,R8,0,1,1,0,0,0
8,0005e8e3701dfb1dd93d53e2ff537b6e,R9,0,1,1,1,0,0
27,001d127bad87592efe45a5c7678f8b8d,R10,0,0,0,1,0,0
28,001d127bad87592efe45a5c7678f8b8d,R8,0,0,0,1,1,0


Checking skewness in dataset

In [60]:
# Count the number of 1's in each target column
target_counts = {column: labels[column].sum() for column in target_columns}

# Display the counts
for column, count in target_counts.items():
    print(f"Count of 1's for {column}: {count}")

Count of 1's for COPD: 46
Count of 1's for Lung tumor: 478
Count of 1's for Pneumonia: 1570
Count of 1's for Tuberculosis: 1479
Count of 1's for Other diseases: 11794
Count of 1's for No finding: 31685


In [32]:
test_labels = pd.read_csv("/content/image_labels_test.csv")

In [34]:
test_labels.head()

Unnamed: 0,image_id,Aortic enlargement,Atelectasis,Calcification,Cardiomegaly,Clavicle fracture,Consolidation,Edema,Emphysema,Enlarged PA,...,Pneumothorax,Pulmonary fibrosis,Rib fracture,Other lesion,COPD,Lung tumor,Pneumonia,Tuberculosis,Other disease,No finding
0,e0dc2e79105ad93532484e956ef8a71a,0,1,1,1,0,0,0,0,0,...,1,0,0,0,0,0,1,0,1,0
1,0aed23e64ebdea798486056b4f174424,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,aa15cfcfca7605465ca0513902738b95,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,0
3,665c4a6d2693dc0286d65ab479c9b169,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,42da2c134b53cb5594774d3d29faac59,1,0,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [35]:
test_labels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 29 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   image_id            3000 non-null   object
 1   Aortic enlargement  3000 non-null   int64 
 2   Atelectasis         3000 non-null   int64 
 3   Calcification       3000 non-null   int64 
 4   Cardiomegaly        3000 non-null   int64 
 5   Clavicle fracture   3000 non-null   int64 
 6   Consolidation       3000 non-null   int64 
 7   Edema               3000 non-null   int64 
 8   Emphysema           3000 non-null   int64 
 9   Enlarged PA         3000 non-null   int64 
 10  ILD                 3000 non-null   int64 
 11  Infiltration        3000 non-null   int64 
 12  Lung Opacity        3000 non-null   int64 
 13  Lung cavity         3000 non-null   int64 
 14  Lung cyst           3000 non-null   int64 
 15  Mediastinal shift   3000 non-null   int64 
 16  Nodule/Mass         3000

In [95]:

test_labels = test_labels.rename(columns={"Other disease": "Other diseases"})
X_test_labels = test_labels.drop(columns=target_columns + ["image_id"])
y_test_labels = test_labels[target_columns]


In [96]:

# Define target columns
target_columns = ["COPD", "Lung tumor", "Pneumonia", "Tuberculosis", "Other diseases", "No finding"]

# Separate features and target labels
X = labels.drop(columns=target_columns + ["image_id", "rad_id"])
y = labels[target_columns]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check for multi-label cases
multi_label_rows_train = y_train.sum(axis=1) > 1
multi_label_rows_test = y_test.sum(axis=1) > 1
print(f"Number of multi-label rows in training data: {multi_label_rows_train.sum()}")
print(f"Number of multi-label rows in testing data: {multi_label_rows_test.sum()}")


Number of multi-label rows in training data: 1580
Number of multi-label rows in testing data: 409


**TRAINING A RANDOM FOREST ON GIVEN DATA FOR MULTI LABEL CLASSIFICATION**

In [97]:

# Multi-Label Classification using RandomForest
model = RandomForestClassifier(random_state=42)
multi_label_model = MultiOutputClassifier(model)

# Train the model
multi_label_model.fit(X_train, y_train)

# Make predictions
y_pred = multi_label_model.predict(X_test)

# Evaluate the model
print("Classification Report (Multi-Label):")
print(classification_report(y_test, y_pred, target_names=target_columns))


Classification Report (Multi-Label):
                precision    recall  f1-score   support

          COPD       0.75      0.67      0.71         9
    Lung tumor       0.50      0.21      0.29        96
     Pneumonia       0.74      0.72      0.73       301
  Tuberculosis       0.66      0.37      0.47       314
Other diseases       0.93      0.98      0.95      2421
    No finding       1.00      1.00      1.00      6277

     micro avg       0.96      0.96      0.96      9418
     macro avg       0.76      0.66      0.69      9418
  weighted avg       0.96      0.96      0.95      9418
   samples avg       0.97      0.97      0.97      9418



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [47]:
print("Accuracy Score:", accuracy_score(y_test, y_pred))

Accuracy Score: 0.8793333333333333


In [98]:
# Make predictions
y_pred_labels = multi_label_model.predict(X_test_labels)

# Evaluate the model
print("Classification Report (Multi-Label):")
print(classification_report(y_test_labels, y_pred_labels, target_names=target_columns))


Classification Report (Multi-Label):
                precision    recall  f1-score   support

          COPD       0.50      0.50      0.50         2
    Lung tumor       0.42      0.10      0.16        80
     Pneumonia       0.83      0.47      0.60       246
  Tuberculosis       0.77      0.28      0.41       164
Other diseases       0.75      0.98      0.85       657
    No finding       1.00      1.00      1.00      2051

     micro avg       0.92      0.90      0.91      3200
     macro avg       0.71      0.55      0.59      3200
  weighted avg       0.91      0.90      0.89      3200
   samples avg       0.93      0.92      0.92      3200



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [99]:
# Calculate class weights based on the provided counts of 1's for each target column
class_counts = {
    "COPD": 46,
    "Lung tumor": 478,
    "Pneumonia": 1570,
    "Tuberculosis": 1479,
    "Other diseases": 11794,
    "No finding": 31685,
}

# Calculate class weights: {class: weight}
class_weights = {i: sum(class_counts.values()) / (len(class_counts) * count) for i, count in enumerate(class_counts.values())}

# Build the Neural Network model
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=X_train.shape[1]))  # First hidden layer
model.add(Dense(32, activation='relu'))  # Second hidden layer
model.add(Dense(len(target_columns), activation='sigmoid'))  # Output layer with sigmoid activation

# Compile the model with binary crossentropy loss and AUC as a metric
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC'])

# Train the model with class weights for each label
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), class_weight=class_weights)


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - AUC: 0.5710 - loss: 0.5782 - val_AUC: 0.9913 - val_loss: 0.1683
Epoch 2/50
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - AUC: 0.9927 - loss: 0.3203 - val_AUC: 0.9934 - val_loss: 0.1062
Epoch 3/50
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - AUC: 0.9942 - loss: 0.2714 - val_AUC: 0.9932 - val_loss: 0.1041
Epoch 4/50
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - AUC: 0.9944 - loss: 0.2582 - val_AUC: 0.9944 - val_loss: 0.0866
Epoch 5/50
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - AUC: 0.9949 - loss: 0.2548 - val_AUC: 0.9946 - val_loss: 0.0827
Epoch 6/50
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - AUC: 0.9946 - loss: 0.2733 - val_AUC: 0.9932 - val_loss: 0.0912
Epoch 7/50
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step

<keras.src.callbacks.history.History at 0x7b363f32eef0>

In [100]:
# Evaluate the model on the test set
loss, auc = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}, Test AUC: {auc}")

# Make predictions on the test set
y_pred_NN = model.predict(X_test)
y_pred_binary_NN = (y_pred > 0.5).astype(int)  # Convert probabilities to binary predictions

# Evaluate the model
print("Classification Report (Multi-Label):")
print(classification_report(y_test, y_pred_binary_NN, target_names=target_columns))



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - AUC: 0.9949 - loss: 0.0720
Test Loss: 0.0712677612900734, Test AUC: 0.9949616193771362
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Classification Report (Multi-Label):
                precision    recall  f1-score   support

          COPD       0.75      0.67      0.71         9
    Lung tumor       0.50      0.21      0.29        96
     Pneumonia       0.74      0.72      0.73       301
  Tuberculosis       0.66      0.37      0.47       314
Other diseases       0.93      0.98      0.95      2421
    No finding       1.00      1.00      1.00      6277

     micro avg       0.96      0.96      0.96      9418
     macro avg       0.76      0.66      0.69      9418
  weighted avg       0.96      0.96      0.95      9418
   samples avg       0.97      0.97      0.97      9418



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [102]:
# Evaluate the model on the test set
loss, auc = model.evaluate(X_test_labels, y_test_labels)
print(f"Test Loss: {loss}, Test AUC: {auc}")

# Make predictions on the test set
y_pred_NN_testlabels = model.predict(X_test_labels)
y_pred_binary_NN_testlabels = (y_pred_NN_testlabels > 0.5).astype(int)  # Convert probabilities to binary predictions

# Evaluate the model
print("Classification Report (Multi-Label):")
print(classification_report(y_test_labels, y_pred_binary_NN_testlabels, target_names=target_columns))


[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - AUC: 0.9544 - loss: 0.2156
Test Loss: 0.10471687465906143, Test AUC: 0.9874153733253479
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Classification Report (Multi-Label):
                precision    recall  f1-score   support

          COPD       0.11      0.50      0.18         2
    Lung tumor       0.37      0.46      0.41        80
     Pneumonia       0.72      0.65      0.68       246
  Tuberculosis       0.54      0.50      0.52       164
Other diseases       0.65      0.40      0.49       657
    No finding       1.00      1.00      1.00      2051

     micro avg       0.88      0.81      0.85      3200
     macro avg       0.57      0.58      0.55      3200
  weighted avg       0.87      0.81      0.83      3200
   samples avg       0.82      0.83      0.82      3200



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [103]:
data = labels.copy()
# Verify that each image_id has exactly three rows
image_counts = data['image_id'].value_counts()
if not all(image_counts == 3):
    raise ValueError("Each image_id must have exactly three rows.")

# Create a unique index for each row within an image_id
data['row_index'] = data.groupby('image_id').cumcount()

# Split the dataset into three subsets based on row_index
dataset_0 = data[data['row_index'] == 0].reset_index(drop=True)  # First rad_id for each image
dataset_1 = data[data['row_index'] == 1].reset_index(drop=True)  # Second rad_id for each image
dataset_2 = data[data['row_index'] == 2].reset_index(drop=True)  # Third rad_id for each image

# Drop the row_index column if no longer needed
dataset_0 = dataset_0.drop(columns=['row_index'])
dataset_1 = dataset_1.drop(columns=['row_index'])
dataset_2 = dataset_2.drop(columns=['row_index'])

# Display results
print("Dataset 0 (First rad_id per image):")
print(dataset_0.info())

print("\nDataset 1 (Second rad_id per image):")
print(dataset_1.info())

print("\nDataset 2 (Third rad_id per image):")
print(dataset_2.info())

Dataset 0 (First rad_id per image):
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15000 entries, 0 to 14999
Data columns (total 30 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   image_id            15000 non-null  object
 1   rad_id              15000 non-null  object
 2   Aortic enlargement  15000 non-null  int64 
 3   Atelectasis         15000 non-null  int64 
 4   Calcification       15000 non-null  int64 
 5   Cardiomegaly        15000 non-null  int64 
 6   Clavicle fracture   15000 non-null  int64 
 7   Consolidation       15000 non-null  int64 
 8   Edema               15000 non-null  int64 
 9   Emphysema           15000 non-null  int64 
 10  Enlarged PA         15000 non-null  int64 
 11  ILD                 15000 non-null  int64 
 12  Infiltration        15000 non-null  int64 
 13  Lung Opacity        15000 non-null  int64 
 14  Lung cavity         15000 non-null  int64 
 15  Lung cyst           15000 non-null

In [104]:
# Function to train and evaluate a multi-label Random Forest on a dataset
def train_multilabel_rf(dataset, feature_columns, target_columns):
    # Split dataset into features (X) and targets (y)
    X = dataset[feature_columns]
    y = dataset[target_columns]

    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize the Random Forest Classifier with MultiOutput support
    rf = RandomForestClassifier(random_state=42, n_jobs=-1)
    multi_rf = MultiOutputClassifier(rf)

    # Train the model
    multi_rf.fit(X_train, y_train)

    # Predict on the test set
    y_pred = multi_rf.predict(X_test)

    # Print classification report
    print("Classification Report:")
    print(classification_report(y_test, y_pred, target_names=target_columns))

    # Return the trained model
    return multi_rf

In [105]:

# Define target columns
target_columns = ["COPD", "Lung tumor", "Pneumonia", "Tuberculosis", "Other diseases", "No finding"]
columns_to_drop = target_columns + ["image_id", "rad_id", "row_index"]

feature_columns = [col for col in data.columns if col not in columns_to_drop]


print("Training on Dataset 0:")
rf_model_0 = train_multilabel_rf(dataset_0, feature_columns, target_columns)

print("\nTraining on Dataset 1:")
rf_model_1 = train_multilabel_rf(dataset_1, feature_columns, target_columns)

print("\nTraining on Dataset 2:")
rf_model_2 = train_multilabel_rf(dataset_2, feature_columns, target_columns)

Training on Dataset 0:
Classification Report:
                precision    recall  f1-score   support

          COPD       0.00      0.00      0.00         1
    Lung tumor       0.67      0.19      0.30        21
     Pneumonia       0.73      0.64      0.68        77
  Tuberculosis       0.57      0.29      0.38        83
Other diseases       0.95      0.99      0.97       830
    No finding       1.00      1.00      1.00      2109

     micro avg       0.97      0.96      0.97      3121
     macro avg       0.65      0.52      0.55      3121
  weighted avg       0.97      0.96      0.96      3121
   samples avg       0.98      0.97      0.97      3121


Training on Dataset 1:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report:
                precision    recall  f1-score   support

          COPD       0.00      0.00      0.00         0
    Lung tumor       0.47      0.45      0.46        33
     Pneumonia       0.88      0.93      0.90       114
  Tuberculosis       0.64      0.42      0.51       102
Other diseases       0.93      0.97      0.95       774
    No finding       1.00      1.00      1.00      2107

     micro avg       0.96      0.97      0.97      3130
     macro avg       0.65      0.63      0.64      3130
  weighted avg       0.96      0.97      0.96      3130
   samples avg       0.97      0.97      0.97      3130


Training on Dataset 2:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report:
                precision    recall  f1-score   support

          COPD       0.50      0.50      0.50         2
    Lung tumor       0.62      0.17      0.27        47
     Pneumonia       0.81      0.68      0.74       116
  Tuberculosis       0.67      0.43      0.52       103
Other diseases       0.91      0.96      0.93       770
    No finding       1.00      1.00      1.00      2105

     micro avg       0.96      0.95      0.95      3143
     macro avg       0.75      0.62      0.66      3143
  weighted avg       0.95      0.95      0.95      3143
   samples avg       0.97      0.96      0.96      3143



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [107]:
 # Predict on the test set
y_pred_rf_0 = rf_model_0.predict(X_test_labels)
y_pred_rf_1 = rf_model_1.predict(X_test_labels)
y_pred_rf_2 = rf_model_2.predict(X_test_labels)


accuracy_0 = accuracy_score(y_test_labels, y_pred_rf_0)
print(f"Overall Accuracy for dataset 0: {accuracy_0:.4f}")
# Print classification report
print("Classification Report for dataset 0:")
print(classification_report(y_test_labels, y_pred_rf_0, target_names=target_columns))


accuracy_1 = accuracy_score(y_test_labels, y_pred_rf_1)
print(f"Overall Accuracy for dataset 0: {accuracy_1:.4f}")
print("Classification Report:")
print(classification_report(y_test_labels, y_pred_rf_1, target_names=target_columns))


accuracy_2 = accuracy_score(y_test_labels, y_pred_rf_2)
print(f"Overall Accuracy for dataset 0: {accuracy_2:.4f}")
print("Classification Report:")
print(classification_report(y_test_labels, y_pred_rf_2, target_names=target_columns))


Overall Accuracy for dataset 0: 0.8657
Classification Report for dataset 0:
                precision    recall  f1-score   support

          COPD       1.00      0.50      0.67         2
    Lung tumor       1.00      0.01      0.02        80
     Pneumonia       0.83      0.32      0.46       246
  Tuberculosis       0.49      0.27      0.35       164
Other diseases       0.74      0.99      0.85       657
    No finding       1.00      1.00      1.00      2051

     micro avg       0.91      0.88      0.90      3200
     macro avg       0.84      0.51      0.56      3200
  weighted avg       0.91      0.88      0.87      3200
   samples avg       0.92      0.91      0.91      3200

Overall Accuracy for dataset 0: 0.8717
Classification Report:
                precision    recall  f1-score   support

          COPD       0.33      0.50      0.40         2
    Lung tumor       0.35      0.35      0.35        80
     Pneumonia       0.73      0.55      0.63       246
  Tuberculosis    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
