In [52]:
import os
import cv2
import pytesseract
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler


In [53]:
# Path to cosmetic ingredient images
image_dir = "C:\\Users\\LENOVO\\Desktop\\DSGP data\\only labels"
cosmetic_texts = []

# Extract text from each image
for img_name in os.listdir(image_dir):
    img_path = os.path.join(image_dir, img_name)
    image = cv2.imread(img_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY)
    text = pytesseract.image_to_string(thresh)
    cosmetic_texts.append(text)

In [54]:
# Vectorize the text data using TF-IDF
vectorizer = TfidfVectorizer(max_features=500)
X = vectorizer.fit_transform(cosmetic_texts).toarray()

In [55]:
# Scale the features for One-Class SVM
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [56]:
# Train a One-Class SVM
oc_svm = OneClassSVM(kernel='rbf', gamma='auto', nu=0.1)  # Adjust nu for sensitivity
oc_svm.fit(X_scaled)

In [68]:
# Function to predict if a new image is cosmetic or not
def predict_cosmetic(image_path):
    # Preprocess the input image
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY)
    text = pytesseract.image_to_string(thresh)
    
    # Vectorize and scale the text
    text_vector = vectorizer.transform([text]).toarray()
    text_scaled = scaler.transform(text_vector)
    
    # Predict using the trained One-Class SVM
    prediction = oc_svm.predict(text_scaled)
    return "Cosmetic" if prediction != 1 else "Not Cosmetic"


In [89]:
# Test the model
test_image = "C:\\Users\\LENOVO\\Desktop\\New folder\\Untitled11.jpg"
result = predict_cosmetic(test_image)
print(f"The image is predicted as: {result}")

The image is predicted as: Not Cosmetic
