### Imports ###

In [None]:
!pip install -q imbalanced-learn lime

import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, recall_score, f1_score
from sklearn.utils import class_weight
from imblearn.over_sampling import SMOTE

from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, BatchNormalization, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from lime import lime_image
from skimage.segmentation import mark_boundaries

print("Environment Ready. TensorFlow version:", tf.__version__)

### Data Loading & Initial Preprocessing ###

In [None]:
base_path = '/kaggle/input/cigarette-smoker-detection/data'
classes = os.listdir(base_path)
data = []

for cls in classes:
    if cls in ['READ.ME', 'data']:
        continue
        
    class_path = os.path.join(base_path, cls)
    
    if os.path.isdir(class_path):
        for img in os.listdir(class_path):
            full_img_path = os.path.join(class_path, img)
            data.append([cls, full_img_path])

df = pd.DataFrame(data, columns=["Class", "ImagePath"])

df['Label'] = df['Class'].map({'not_smoking': 0, 'smoking': 1})

df = df.sample(frac=1, random_state=42).reset_index(drop=True)

print("Dataset Summary:")
print(df["Class"].value_counts())

sns.countplot(x='Class', data=df, palette='viridis')
plt.title("Class Distribution Before SMOTE")
plt.show()

### Feature Selection ###

In [None]:
IMG_SIZE = 300 

def extract_deep_features(df):
    extractor = EfficientNetB3(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3), pooling='avg')
    
    X_list = []
    print("Loading and Preprocessing Images")
    for path in df['ImagePath']:
        img = cv2.imread(path)
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        img = tf.keras.applications.efficientnet.preprocess_input(img)
        X_list.append(img)
    
    X_images = np.array(X_list)
    print("Extracting Deep Features (this involves feature selection)")
    features = extractor.predict(X_images, batch_size=32, verbose=1)
    return features, X_images

features, raw_images = extract_deep_features(df)
y = df['Label'].values

### Class Imbalance Handling ###

In [None]:
from imblearn.over_sampling import SMOTE

print(f"Before SMOTE: {np.bincount(y)}")

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(features, y)

print(f"After SMOTE: {np.bincount(y_resampled)}")

X_train, X_val, y_train, y_val = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

### Model Training & Hyperparameter Tuning ###

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

model = Sequential([
    layers.Input(shape=(1536,)), 
    Dense(512, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

callbacks = [
    EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-7, verbose=1)
]

print("Starting Training")
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    callbacks=callbacks
)

### Result Comparison & Metrics ###

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, recall_score, f1_score, precision_score

y_pred_probs = model.predict(X_val)
y_pred = (y_pred_probs > 0.5).astype(int)

print("\nClassification Report")
print(classification_report(y_val, y_pred, target_names=['Not Smoking', 'Smoking']))

results = pd.DataFrame({
    "Metric": ["Accuracy", "Recall", "F1 Score", "Precision"],
    "Score": [
        accuracy_score(y_val, y_pred),
        recall_score(y_val, y_pred),
        f1_score(y_val, y_pred),
        precision_score(y_val, y_pred)
    ]
})
print("\nSummary Table")
display(results)

plt.figure(figsize=(8,6))
sns.heatmap(confusion_matrix(y_val, y_pred), annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Not Smoking', 'Smoking'], yticklabels=['Not Smoking', 'Smoking'])
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

### Explainable AI (XAI) using LIME ###

In [None]:
import random
from lime import lime_image
from skimage.segmentation import mark_boundaries

sample_idx = random.randint(0, len(raw_images) - 1)
sample_img = raw_images[sample_idx]

base_feat_extractor = EfficientNetB3(weights='imagenet', include_top=False, 
                                     input_shape=(IMG_SIZE, IMG_SIZE, 3), pooling='avg')

def lime_predict(images):
    proc = tf.keras.applications.efficientnet.preprocess_input(images)
    feats = base_feat_extractor.predict(proc, verbose=0)
    return model.predict(feats, verbose=0)

print(f"Generating LIME Explanation for Index {sample_idx} ({df.iloc[sample_idx]['Class']})")

explainer = lime_image.LimeImageExplainer()
explanation = explainer.explain_instance(
    sample_img.astype('double'), 
    lime_predict, 
    top_labels=1, 
    hide_color=0, 
    num_samples=500 
)

top_label = explanation.top_labels[0]
temp, mask = explanation.get_image_and_mask(top_label, positive_only=True, num_features=5, hide_rest=False)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

ax1.imshow(sample_img / 255.0 if sample_img.max() > 1 else sample_img)
ax1.set_title(f"Original: {df.iloc[sample_idx]['Class']}")
ax1.axis('off')

ax2.imshow(mark_boundaries(temp / 255.0, mask))
ax2.set_title(f"LIME: Areas that indicate '{df.iloc[sample_idx]['Class']}'")
ax2.axis('off')

plt.tight_layout()
plt.show()