In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
sujaykapadnis_smoking_path = kagglehub.dataset_download('sujaykapadnis/smoking')

print('Data source import complete.')


In [None]:
import pandas as pd
import numpy as np
import joblib
import shutil
import os
import sys
import zipfile

import random


import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import seaborn as sns


import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots


from PIL import Image


import copy
from tensorflow.keras.optimizers import Adamax
from tensorflow.keras.layers import BatchNormalization, GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras import regularizers
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import CosineDecay

In [None]:
#กำหนด path
Training_path = "/kaggle/input/smoking/Training/Training"
Validation_path = "/kaggle/input/smoking/Validation/Validation"
Test_path = "/kaggle/input/smoking/Testing/Testing"

In [None]:
# โหลดข้อมูลจาก folder
def loaddata(folder):
    data = []
    labels = []
    #loop หา file ท้ายด้วยย .jpg หรือ .png
    for filename in os.listdir(folder):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            #กำหนด label แยกหา smoking กับ notsmoking
            label = filename.split("_")[0]
            if label in ["smoking" , "notsmoking"]:
                    file_path = os.path.join(folder, filename)
                    data.append(file_path)
                    labels.append(label)
    # ส่ง dataframe ออกไป
    return pd.DataFrame({'path': data, 'label': labels})

In [None]:
train_data = loaddata(Training_path)
val_data = loaddata(Validation_path)
test_data = loaddata(Test_path)

In [None]:
train_data.head() #เริ่มดูข้อมูลจากหัวว่ามีไรบ้าง

In [None]:
train_data.tail() #เริ่มดูข้อมูลจากหางว่ามีไรบ้าง

In [None]:
val_data.head()

In [None]:
test_data.head()

In [None]:
row , col = train_data.shape
print("ROW : ",row)
print("COL : ",col)

In [None]:
row , col = val_data.shape
print("ROW : ",row)
print("COL : ",col)

In [None]:
row , col = test_data.shape
print("ROW : ",row)
print("COL : ",col)

In [None]:
#เอาชุดข้อมูลทั้งสามอันมารวมกัน
combined_data = pd.concat([train_data, val_data, test_data], ignore_index=True)

In [None]:
#ดู head มันหลังรวม
combined_data.head()

In [None]:
# จำนวน row col หลังรวม
rows, columns = combined_data.shape
print("Number of rows:", rows)
print("Number of columns:", columns)

In [None]:
# จากการดู info ทำให้รู้ว่า data type มันเป็น object
combined_data.info()
file_path = "/kaggle/working/combined_data.csv"
combined_data.to_csv(file_path, index=False)  # index=False ไม่ต้องบันทึก index

In [None]:
# แปลง data type to string
combined_data['path'] = combined_data['path'].astype(str)
combined_data['label'] = combined_data['label'].astype(str)

In [None]:
#หาค่าที่เป็น null
combined_data.isnull().sum()

In [None]:
custom_palette = {
    'smoking': '#00040D',
    'notsmoking': '#D24A56',
}
#นับจำนวนของแต่ละ label
label_counts = combined_data['label'].value_counts().reset_index()
label_counts.columns = ['label', 'count']

#แสดงเป็นกราฟ
fig = px.bar(label_counts,
             x='label', y='count',
             labels={'label': 'Label', 'count': 'Count'},
             title='Distribution of Labels',
             color='label',
             color_discrete_map=custom_palette)

fig.show()

In [None]:
# ทำการ สร้างตาราง encoding ข้อมูล not smoking กับ smoking ให้อยู่ในรูปแบบ 1 กับ 0 เพื่อให้นำมาใช้ได้ง่าย
combined_data['label_encoded'] = combined_data['label'].map({'notsmoking': '0', 'smoking': '1'})

In [None]:
combined_data.head()

In [None]:
# แบ่งข้อมูลข้อมูลออกมาเป็น train data กับ temp data จาก combined_data โดยอิงตาม column
# label_encoded
train_data, temp_data = train_test_split(combined_data,
                                         test_size=0.3,
                                         stratify=combined_data['label_encoded'],
                                         random_state=42)
# แบ่งข้อมูลจาก tempdata ออกมาไว้เป็น validation กับ test
val_data, test_data = train_test_split(temp_data,
                                       test_size=0.5,
                                       stratify=temp_data['label_encoded'],
                                       random_state=42)

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize ภาพ
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
# ทำการ rescale ภาพ
test_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
#โหลดข้อมูลจาก DataFrame เตรียมข้อมูล และ validation
train_generator = train_datagen.flow_from_dataframe(
    train_data,
    x_col='path',
    y_col='label_encoded',
    target_size=(224, 224),  # Change from 224x224 to 224x224
    batch_size=32,
    class_mode='binary'
)

val_generator = test_datagen.flow_from_dataframe(
    val_data,
    x_col='path',
    y_col='label_encoded',
    target_size=(224, 224),  # Change from 224x224 to 224x224
    batch_size=32,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_dataframe(
    test_data,
    x_col='path',
    y_col='label_encoded',
    target_size=(224, 224),  # Change from 224x224 to 224x224
    batch_size=32,
    class_mode='binary'
)

In [None]:
# โหลด model MobileNetV2
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# Freeze layers ของ MobileNetV2 เพื่อลดการใช้พลังการคำนวณ
base_model.trainable = False

# สร้าง model
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# สร้าง EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# ฝึกโมเดล
history = model.fit(
    train_generator,
    epochs=20,
    validation_data=val_generator,
    batch_size=32,
    callbacks=[early_stopping]
)

In [None]:
# ทำการ unfreeze บางเลเยอร์เพื่อฝึกต่อจากโมเดลที่ pre-trained
base_model.trainable = True
fine_tune_at = 100  # ตัวอย่างนี้จะ unfrezed 100 เลเยอร์
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=0.00001), loss='binary_crossentropy', metrics=['accuracy'])


# ฝึกโมเดลใหม่
history_fine_tune = model.fit(
    train_generator,
    epochs=20,
    validation_data=val_generator,
    batch_size=32,
    callbacks=[early_stopping]
)
model.save('/kaggle/working/smoking_detection_model.h5', save_format='h5')

In [None]:
'''
def apply_quantization(model):
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    tflite_model = converter.convert()
    return tflite_model

quantized_model = apply_quantization(model)
quantized_model_path = '/kaggle/working/smoking_detection_model_quantized.tflite'
with open(quantized_model_path, 'wb') as f:
    f.write(quantized_model)
'''

In [None]:
'''
def compress_model(model_path):
    zip_filename = model_path.replace('.h5', '.zip')
    with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
        zipf.write(model_path, os.path.basename(model_path))
    return zip_filename
compressed_model_path = compress_model('/kaggle/working/smoking_detection_model.h5')
print(f"Compressed model saved as: {compressed_model_path}")
print(os.listdir('/kaggle/working/'))
'''

In [None]:
file_path_acc_loss = '/kaggle/working/accuracy_loss_plot.png'
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(len(acc))  # กำหนดจำนวน epoch ที่ใช้ฝึก

# Plot Accuracy
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy', marker='o')
plt.plot(epochs_range, val_acc, label='Validation Accuracy', marker='o')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

# Plot Loss
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss', marker='o')
plt.plot(epochs_range, val_loss, label='Validation Loss', marker='o')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')

plt.savefig(file_path_acc_loss)
plt.close()