In [3]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import load_model



In [4]:
train_dir = './chest_xray/train/'
test_dir = './chest_xray/test/'
categories = ['NORMAL', 'PNEUMONIA']


filepaths = []
labels = []

In [5]:
for category in categories:
    folder = os.path.join(train_dir, category)
    for fname in os.listdir(folder):
        filepaths.append(f"{category}/{fname}")
        labels.append(category)

In [6]:
df = pd.DataFrame({'Filename': filepaths, 'Label': labels})


In [7]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=20,
    zoom_range=0.2,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)

In [8]:
valid_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

In [9]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe=df,
    directory=train_dir,
    x_col='Filename',
    y_col='Label',
    subset='training',
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode='binary',
    target_size=(150, 150)
)

Found 4173 validated image filenames belonging to 2 classes.


In [10]:
valid_generator = valid_datagen.flow_from_dataframe(
    dataframe=df,
    directory=train_dir,
    x_col='Filename',
    y_col='Label',
    subset='validation',
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode='binary',
    target_size=(150, 150)
)

Found 1043 validated image filenames belonging to 2 classes.


In [None]:
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_generator.classes),
    y=train_generator.classes
)
class_weights = dict(enumerate(class_weights))
print("Class weights:", class_weights)

Class weights: {0: np.float64(7.001677852348993), 1: np.float64(0.5384516129032259)}


In [12]:
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(150,150,3)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
model.compile(
    optimizer=Adam(),
    loss='binary_crossentropy',
    metrics=['accuracy', AUC(name='auc')]
)

model.summary()

In [14]:
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=10,
    validation_data=valid_generator,
    validation_steps=valid_generator.samples // valid_generator.batch_size,
    class_weight=class_weights
)

  self._warn_if_super_not_called()


Epoch 1/10
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 815ms/step - accuracy: 0.4914 - auc: 0.5863 - loss: 0.6771 - val_accuracy: 0.7939 - val_auc: 0.0000e+00 - val_loss: 0.5811
Epoch 2/10
[1m  1/130[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m31s[0m 248ms/step - accuracy: 0.7812 - auc: 1.0000 - loss: 0.3149



[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 170ms/step - accuracy: 0.7812 - auc: 1.0000 - loss: 0.3149 - val_accuracy: 0.7002 - val_auc: 0.0000e+00 - val_loss: 0.7834
Epoch 3/10
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 720ms/step - accuracy: 0.7092 - auc: 0.8111 - loss: 0.5671 - val_accuracy: 0.9277 - val_auc: 0.0000e+00 - val_loss: 0.2382
Epoch 4/10
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 160ms/step - accuracy: 0.8125 - auc: 0.9152 - loss: 0.4559 - val_accuracy: 0.9775 - val_auc: 0.0000e+00 - val_loss: 0.1020
Epoch 5/10
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 703ms/step - accuracy: 0.8167 - auc: 0.9069 - loss: 0.3983 - val_accuracy: 0.9912 - val_auc: 0.0000e+00 - val_loss: 0.0653
Epoch 6/10
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 155ms/step - accuracy: 0.7812 - auc: 1.0000 - loss: 0.2700 - val_accuracy: 0.9834 - val_auc: 0.0000e+00 - val_loss: 0.0912
Epoch 

In [15]:
test_filepaths = []
test_labels = []

In [16]:
for category in categories:
    folder = os.path.join(test_dir, category)
    for fname in os.listdir(folder):
        test_filepaths.append(f"{category}/{fname}")
        test_labels.append(category)

In [17]:
test_df = pd.DataFrame({'Filename': test_filepaths, 'Label': test_labels})

In [18]:
test_datagen = ImageDataGenerator(rescale=1./255)

In [19]:
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=test_dir,
    x_col='Filename',
    y_col='Label',
    batch_size=32,
    shuffle=False,
    class_mode='binary',
    target_size=(150,150)
)   

Found 624 validated image filenames belonging to 2 classes.


In [20]:
preds_probs = model.predict(test_generator)
preds = (preds_probs > 0.5).astype(int).flatten()

true_labels = test_generator.classes
class_labels = list(test_generator.class_indices.keys())

# --- Метрики ---
print(classification_report(true_labels, preds, target_names=class_labels))
print(confusion_matrix(true_labels, preds))

  self._warn_if_super_not_called()


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 359ms/step
              precision    recall  f1-score   support

      NORMAL       0.69      0.94      0.80       234
   PNEUMONIA       0.95      0.75      0.84       390

    accuracy                           0.82       624
   macro avg       0.82      0.85      0.82       624
weighted avg       0.86      0.82      0.82       624

[[220  14]
 [ 97 293]]


In [21]:
model.save("pneumonia_model.h5")


