In [1]:
import pandas as pd
import numpy as np
import os
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, roc_auc_score
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Dropout, Concatenate, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Paths
image_dir = '/mnt/data/'
csv_file = '/mnt/data/meta.csv'  # You may need to merge other CSVs into this

# Parameters
IMG_SIZE = 224
BATCH_SIZE = 16
EPOCHS = 20

# 1. Load and process CSV
df = pd.read_csv(csv_file)
df = df.dropna(subset=['image_id', 'pathology'])

# Encode labels
df['label'] = df['pathology'].map({'BENIGN': 0, 'MALIGNANT': 1})

# Normalize metadata features
meta_features = ['age', 'density', 'BI-RADS']  # Adjust based on available fields
for col in meta_features:
    if df[col].dtype == 'object':
        df[col] = LabelEncoder().fit_transform(df[col].astype(str))

scaler = StandardScaler()
df[meta_features] = scaler.fit_transform(df[meta_features])

# 2. Load and preprocess images
def load_image(img_id):
    try:
        img_path = os.path.join(image_dir, img_id)
        img = load_img(img_path, color_mode='grayscale', target_size=(IMG_SIZE, IMG_SIZE))
        img = img_to_array(img) / 255.0
        return img
    except:
        return np.zeros((IMG_SIZE, IMG_SIZE, 1))

# Keep only images that exist
df['image_path'] = df['image_id']
df['image'] = df['image_path'].apply(load_image)
df = df[df['image'].apply(lambda x: x.sum() > 0)]

X_img = np.stack(df['image'].values)
X_meta = df[meta_features].values
y = df['label'].values

# 3. Train/test split
X_img_train, X_img_val, X_meta_train, X_meta_val, y_train, y_val = train_test_split(
    X_img, X_meta, y, test_size=0.2, stratify=y, random_state=42
)

# 4. Build model
# Image branch
img_input = Input(shape=(IMG_SIZE, IMG_SIZE, 1))
x = Conv2D(16, (3,3), activation='relu')(img_input)
x = MaxPooling2D()(x)
x = Conv2D(32, (3,3), activation='relu')(x)
x = MaxPooling2D()(x)
x = Flatten()(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.3)(x)

# Metadata branch
meta_input = Input(shape=(X_meta.shape[1],))
m = Dense(32, activation='relu')(meta_input)
m = Dropout(0.3)(m)

# Combine
combined = Concatenate()([x, m])
z = Dense(32, activation='relu')(combined)
z = Dropout(0.3)(z)
output = Dense(1, activation='sigmoid')(z)

model = Model(inputs=[img_input, meta_input], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 5. Train
history = model.fit(
    [X_img_train, X_meta_train], y_train,
    validation_data=([X_img_val, X_meta_val], y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE
)

# 6. Evaluate
y_pred = model.predict([X_img_val, X_meta_val])
y_pred_bin = (y_pred > 0.5).astype(int)

print(classification_report(y_val, y_pred_bin))
print("ROC AUC:", roc_auc_score(y_val, y_pred))


FileNotFoundError: [Errno 2] No such file or directory: '/mnt/data/meta.csv'

In [2]:
meta_df = pd.read_csv('meta.csv')  # If it's in the same folder as your script


FileNotFoundError: [Errno 2] No such file or directory: 'meta.csv'

In [3]:
meta_df = pd.read_csv('data/meta.csv')  # Adjust based on your actual path


FileNotFoundError: [Errno 2] No such file or directory: 'data/meta.csv'

In [4]:
import os
print(os.listdir())  # See what files are in the current directory


['.anaconda', '.arduinoIDE', '.conda', '.condarc', '.continuum', '.cursor', '.idlerc', '.ipynb_checkpoints', '.ipython', '.jupyter', '.keras', '.matplotlib', '.ms-ad', '.thumbnails', '.vscode', '9', 'anaconda3', 'anaconda_projects', 'AppData', 'Application Data', 'archive', 'bcc.ipynb', 'blenderkit_data', 'breast cancer', 'breast cancer classification', 'Contacts', 'Cookies', 'Documents', 'Downloads', 'extracted_data', 'Favorites', 'IntelGraphicsProfiles', 'Links', 'Local Settings', 'Music', 'My Documents', 'myenv', 'NetHood', 'NTUSER.DAT', 'ntuser.dat.LOG1', 'ntuser.dat.LOG2', 'NTUSER.DAT{a502dce5-18b3-11f0-8d58-dd200724ab76}.TM.blf', 'NTUSER.DAT{a502dce5-18b3-11f0-8d58-dd200724ab76}.TMContainer00000000000000000001.regtrans-ms', 'NTUSER.DAT{a502dce5-18b3-11f0-8d58-dd200724ab76}.TMContainer00000000000000000002.regtrans-ms', 'ntuser.ini', 'OneDrive', 'PrintHood', 'PyCharmMiscProject', 'Recent', 'Saved Games', 'Searches', 'SendTo', 'Start Menu', 'Templates', 'Untitled.ipynb', 'Untitled1.