In [2]:
!pip install scikit-learn

Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-win_amd64.whl.metadata (15 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.6.1-cp312-cp312-win_amd64.whl (11.1 MB)
   ---------------------------------------- 0.0/11.1 MB ? eta -:--:--
   --- ------------------------------------ 1.0/11.1 MB 6.3 MB/s eta 0:00:02
   --------- ------------------------------ 2.6/11.1 MB 6.0 MB/s eta 0:00:02
   -------------- ------------------------- 3.9/11.1 MB 6.3 MB/s eta 0:00:02
   ------------------ --------------------- 5.2/11.1 MB 6.4 MB/s eta 0:00:01
   ----------------------- ---------------- 6.6/11.1 MB 6.4 MB/s eta 0:00:01
   ----------------------------- ---------- 8.1/11.1 MB 6.4 MB/s eta 0:


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import tensorflow as tf
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

# Load dataset
df = pd.read_csv("augmented.csv")

# Define image parameters
IMG_SIZE = (224, 224)  # Resize images to 224x224
BATCH_SIZE = 8
NUM_CLASSES = df["name"].nunique()  # Assuming the CSV has a 'label' column

# Load images and labels
images = []
labels = []

for _, row in df.iterrows():
    img_path = os.path.join("D:/rximage/image/images/split_padded_rotated", row["new_filename"])
    img = cv2.imread(img_path)
    
    if img is not None:
        img = cv2.resize(img, IMG_SIZE)  # Resize
        img = img / 255.0  # Normalize
        images.append(img)
        labels.append(row["name"])  # Assuming label column exists

# Convert to NumPy arrays
images = np.array(images)
labels = np.array(labels)

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)

# Load ResNet50 with pre-trained ImageNet weights, excluding the top classification layer
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Freeze base model layers (optional, can be unfrozen later for fine-tuning)
base_model.trainable = False  

# Add custom classification layers
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(128, activation="relu")(x)
output = Dense(NUM_CLASSES, activation="softmax")(x)

# Define new model
model = Model(inputs=base_model.input, outputs=output)

# Compile the model
model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

# Train the model
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=BATCH_SIZE)

# Save model
model.save("pill_classifier_resnet.h5")

print("Training complete and model saved.")

: 