In [2]:
! pip install opencv-python

Collecting opencv-python
  Using cached opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Using cached opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl (39.5 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.11.0.86


In [92]:
import cv2
import pymysql 
import os 
from dotenv import load_dotenv

load_dotenv()

conn = pymysql.connect(

host=os.getenv("MYSQL_HOST"),       
user=os.getenv("MYSQL_USER_NAME"),
password=os.getenv("MYSQL_USER_PASSWORD"),
db=os.getenv("MYSQL_DATABASE"),
port=int(os.getenv("MYSQL_PORT")),

charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor

)

In [103]:
query = """
SELECT * FROM menstable_test2;
"""

with conn.cursor() as cursor:
    cursor.execute(query)
    result = cursor.fetchall()


In [72]:
import requests
from io import BytesIO
from PIL import Image
from collections import Counter

len(result)

31302

In [None]:
# 이미지 로딩 
import numpy as np

def load_image_from_url(url: str):
    response = requests.get(url, timeout=5)
    response.raise_for_status()
    image = Image.open(BytesIO(response.content)).convert("RGB")
    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)


In [46]:
def is_white(rgb, threshold=235):
    return all(c >= threshold for c in rgb) and max(rgb) - min(rgb) <= 10

# 같은 색상 판단 기준

In [104]:
import pandas as pd

df = pd.DataFrame(result)

In [16]:
df

def validate_white_color(row):
    try:
        img = load_image_from_url(row["thumbnail_url"])
        dominant_colors = extract_dominant_colors_cv(img)
        print(f"[{row['goods_name']}] Dominant: {dominant_colors[0]}")
        return is_white(dominant_colors[0]["rgb"])
    except Exception as e:
        print(f"⚠️ 오류 ({row['goods_name']}): {e}")
        return False

In [2]:
df["color"].unique().tolist()

NameError: name 'df' is not defined

In [1]:
df["is_true_white"] = df.apply(validate_white_color, axis=1)

NameError: name 'df' is not defined

In [3]:
df.loc[df["is_true_white"] == True].reset_index(drop=True).head()

NameError: name 'df' is not defined

In [None]:
def extract_dominant_rgb(row):
    try:
        img = load_image_from_url(row["thumbnail_url"])  # 썸네일에서 이미지 요청
        dominant_colors = extract_dominant_colors_cv(img, k=3)  # k는 적절히 조절
        return dominant_colors[0]["rgb"]  # 가장 높은 비중의 색상만 추출
    except Exception as e:
        print(f"⚠️ {row['goods_name']} - 이미지 로드 실패: {e}")
        return None


In [4]:
from collections import defaultdict

# 상품별 dominant color 수집
color_pool = defaultdict(list)

for _, row in df.iterrows():
    img = load_image_from_url(row["thumbnail_url"])
    dominant = extract_dominant_colors_cv(img, k=1)[0]["rgb"]
    color_pool[row["color"]].append(dominant)

# 평균 RGB 계산
for color_name, rgbs in color_pool.items():
    avg_rgb = tuple(int(np.mean([rgb[i] for rgb in rgbs])) for i in range(3))
    print(f"{color_name}: {avg_rgb}")


NameError: name 'df' is not defined

In [81]:
from sklearn.cluster import KMeans
import numpy as np

def cluster_and_flag(df, color_label, k=2):
    # 해당 컬러 라벨만 추출
    sub = df[df["color"] == color_label].copy()
    if sub.empty:
        return df  # 라벨이 없는 경우 패스

    rgb_array = np.array(sub["dominant_rgb"].tolist())

    kmeans = KMeans(n_clusters=k, random_state=42)
    labels = kmeans.fit_predict(rgb_array)

    # 주로 몰려있는 중심(0 또는 1)을 선택: 가장 많은 클러스터
    counts = np.bincount(labels)
    main_cluster = np.argmax(counts)

    # True/False 부여
    df[f"is_true_{color_label}"] = False
    df.loc[sub.index, f"is_true_{color_label}"] = (labels == main_cluster)

    return df
