In [1]:
import pandas as pd

# Load your CSV
df = pd.read_csv("xray.csv")  # Replace with actual path

# Create binary label column
df["label"] = df["diagnosis"].apply(lambda x: 1 if "lung cancer" in x.lower() else 0)


In [2]:
df


Unnamed: 0,study_id,subtlety,size,age,gender,x,y,state,position,diagnosis,label
0,JPCLN001.png,5.0,15.0,53,Male,1634.0,692.0,malignant,l.upper lobe,lung cancer,1
1,JPCLN002.png,5.0,30.0,78,Female,1614.0,1090.0,benign,l.upper lobe(S5),pulmonary hematoma,0
2,JPCLN003.png,5.0,30.0,63,Female,1303.0,447.0,malignant,l.upper lobe(S1+2),lung cancer,1
3,JPCLN004.png,5.0,14.0,68,Female,606.0,836.0,benign,r.upper lobe,Inflammatory mass,0
4,JPCLN005.png,5.0,35.0,73,Female,1438.0,692.0,malignant,l.upper lobe(S3),lung cancer,1
...,...,...,...,...,...,...,...,...,...,...,...
242,JPCNN089.png,,,50,Male,,,non-nodule,,0,0
243,JPCNN090.png,,,68,Female,,,non-nodule,,0,0
244,JPCNN091.png,,,81,Male,,,non-nodule,,0,0
245,JPCNN092.png,,,70,Female,,,non-nodule,,0,0


In [3]:
import tensorflow as tf
import pandas as pd
import os

# Constants
IMG_SIZE = (224, 224)
IMAGE_DIR = r"F:/Shakunthala/Hope/15. Lung Cancer detection + Mental health chatbot/lung_cancer_appl/detector/images"

# Load CSV
df = pd.read_csv("xray.csv")  # Replace with your actual CSV path

# ✅ Create full image path
df["filepath"] = df["study_id"].apply(lambda x: os.path.join(IMAGE_DIR, x))

# ✅ Filter only existing image paths
df = df[df["filepath"].apply(os.path.exists)].reset_index(drop=True)

# ✅ Label lung cancer as 1, others as 0
df["label"] = df["diagnosis"].apply(lambda x: 1 if "lung cancer" in str(x).lower() else 0)

# ✅ Define image loader
def load_image(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, IMG_SIZE)
    return img / 255.0

# ✅ Create TensorFlow dataset
def make_dataset(df):
    paths = df["filepath"].values
    labels = df["label"].values
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    ds = ds.map(lambda x, y: (load_image(x), tf.cast(y, tf.float32)))
    return ds

# ✅ Train/Validation Split
train_df = df.sample(frac=0.8, random_state=42)
val_df = df.drop(train_df.index)

# ✅ TF Datasets
train_ds = make_dataset(train_df).shuffle(100).repeat().batch(32).prefetch(tf.data.AUTOTUNE)
val_ds = make_dataset(val_df).batch(32).prefetch(tf.data.AUTOTUNE)

# ✅ Optional: Print to verify
print(f"Training samples: {len(train_df)}")
print(f"Validation samples: {len(val_df)}")


Training samples: 198
Validation samples: 49


In [4]:
# Example simple CNN model
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(224, 224, 3)),
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# ✅ Train the model
steps_per_epoch = len(train_df) // 32
validation_steps = len(val_df) // 32

model.fit(train_ds,
          validation_data=val_ds,
          epochs=10,
          steps_per_epoch=steps_per_epoch,
          validation_steps=validation_steps)




Epoch 1/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 3s/step - accuracy: 0.4945 - loss: 4.7793 - val_accuracy: 0.4375 - val_loss: 0.7172
Epoch 2/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 3s/step - accuracy: 0.5100 - loss: 0.6910 - val_accuracy: 0.4375 - val_loss: 0.7401
Epoch 3/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3s/step - accuracy: 0.6218 - loss: 0.6639 - val_accuracy: 0.4375 - val_loss: 0.7129
Epoch 4/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 3s/step - accuracy: 0.6690 - loss: 0.6570 - val_accuracy: 0.4375 - val_loss: 0.7789
Epoch 5/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 3s/step - accuracy: 0.6007 - loss: 0.6648 - val_accuracy: 0.4375 - val_loss: 0.8602
Epoch 6/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2s/step - accuracy: 0.6183 - loss: 0.6485 - val_accuracy: 0.4375 - val_loss: 0.7308
Epoch 7/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x22599146120>

In [5]:
# Save the trained model
model.save("xray_model.h5")
print("Model saved")



Model saved


In [5]:
model.summary()