In [20]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import lightgbm as lgb
from tensorflow.keras.preprocessing.image import load_img, img_to_array

In [1]:
pip install lightgbm

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [21]:
train_dir = r"C:\Users\ADMIN\Downloads\lungcancer\Data\train"
val_dir = r"C:\Users\ADMIN\Downloads\lungcancer\Data\valid"
test_dir = r"C:\Users\ADMIN\Downloads\lungcancer\Data\test"

In [22]:
img_height, img_width = 128, 128

In [23]:
def load_images(data_dir):
    images, labels = [], []
    for label in os.listdir(data_dir):
        label_path = os.path.join(data_dir, label)
        if os.path.isdir(label_path):
            for img_file in os.listdir(label_path):
                img_path = os.path.join(label_path, img_file)
                try:
                    img = load_img(img_path, target_size=(img_height, img_width), color_mode='grayscale')
                    img_array = img_to_array(img) / 255.0
                    images.append(img_array.flatten())
                    labels.append(label)
                except Exception as e:
                    print(f"Error loading image {img_path}: {e}")
    return np.array(images), np.array(labels)

In [24]:
X_train, y_train = load_images(train_dir)
X_val, y_val = load_images(val_dir)
X_test, y_test = load_images(test_dir)

In [26]:
from sklearn.utils.class_weight import compute_class_weight
from sklearn.preprocessing import StandardScaler
import lightgbm as lgb
from sklearn.linear_model import LogisticRegression
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np

In [27]:
# Normalize labels
y_train = [label.lower().strip() for label in y_train]
y_val = [label.lower().strip() for label in y_val]
y_test = [label.lower().strip() for label in y_test]

# Fit LabelEncoder on all labels
le = LabelEncoder()
le.fit(y_train + y_val + y_test)

# Encode labels
y_train_encoded = le.transform(y_train)
y_val_encoded = le.transform(y_val)
y_test_encoded = le.transform(y_test)
class_weights = compute_class_weight('balanced', classes=np.unique(y_train_encoded), y=y_train_encoded)
class_weight_dict = dict(zip(np.unique(y_train_encoded), class_weights))


In [28]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [29]:
# LightGBM params
params = {
    'objective': 'multiclass',
    'num_class': len(le.classes_),
    'metric': 'multi_logloss',
    'learning_rate': 0.05,
    'verbose': -1,
    'max_depth': -1,
    'num_leaves': 31,
    'scale_pos_weight': class_weights.max()
}

# StandardScaler to scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Logistic Regression model
lr_model = LogisticRegression(max_iter=5000, class_weight='balanced', solver='liblinear')
lr_model.fit(X_train_scaled, y_train_encoded)
y_pred_lr = lr_model.predict(X_test_scaled)
print("Logistic Regression Results:")
print(classification_report(y_test_encoded, y_pred_lr, zero_division=1))

Logistic Regression Results:
              precision    recall  f1-score   support

           0       1.00      0.00      0.00       120
           1       0.00      1.00      0.00         0
           2       1.00      0.00      0.00        51
           3       0.00      1.00      0.00         0
           4       0.77      0.91      0.83        54
           5       1.00      0.00      0.00        90
           6       0.00      1.00      0.00         0

    accuracy                           0.16       315
   macro avg       0.54      0.56      0.12       315
weighted avg       0.96      0.16      0.14       315



In [None]:
# Corrected LightGBM model training with early stopping in callbacks
lgb_train = lgb.Dataset(X_train_scaled, label=y_train_encoded)
lgb_eval = lgb.Dataset(X_val_scaled, label=y_val_encoded, reference=lgb_train)

# Correct placement of early stopping
lgb_model = lgb.train(
    params,
    lgb_train,
    num_boost_round=1000,
    valid_sets=[lgb_train, lgb_eval],
    valid_names=['train', 'valid'],
    callbacks=[lgb.early_stopping(stopping_rounds=50)]  # Correctly placed in callbacks
)

y_pred_lgb = np.argmax(lgb_model.predict(X_test_scaled, num_iteration=lgb_model.best_iteration), axis=1)
print("LightGBM Results:")
print(classification_report(y_test_encoded, y_pred_lgb))


Training until validation scores don't improve for 50 rounds
