In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import utils

In [None]:
folder = 'data'
output_csv = r'C:\Users\KDT-37\Desktop\KDT_7\09_ML_CV\Project'

In [None]:
image_files = [f for f in os.listdir(folder) if f.endswith(('.jpg', '.png'))]
rows = []

for file in image_files:
    path = os.path.join(folder, file)
    img = cv2.imread(path)
    if img is None:
        continue

    fg_img = utils.remove_background(img)
    dominant_colors = utils.extract_dominant_colors(fg_img, k=5)
    if not dominant_colors:
        print(f"분석 불가: {file}")
        continue
    

    #print(f"\n{file} 지배 색상 분석:")
    row = [file]
    for idx, (color, ratio) in enumerate(dominant_colors):
        hue_group = utils.classify_color_group(color)
        #print(f"  Color {idx+1}: RGB={color}, 비율={ratio:.2f}, 계열={hue_group}")
        row += list(color) + [ratio]

    rows.append(row)

    # # 대표 색상 시각화
    # utils.plot_colors([c for c, _ in dominant_colors],
    #             [r for _, r in dominant_colors],
    #             file)

# CSV 저장
columns = ['filename']
for i in range(1, 6):
    columns += [f'R{i}', f'G{i}', f'B{i}', f'Ratio{i}']

df = pd.DataFrame(rows, columns=columns)
df.to_csv(output_csv, index=False)
print(f"\n CSV 저장 완료: {output_csv}")

In [None]:
df.head()

In [None]:
df['label'] = df['filename'].apply(lambda x: x.split('_')[0].lower())

[ 랜덤포레스트 ]<hr>

In [None]:
%pip install seaborn

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

X = df.drop(['filename', 'label'], axis=1)
y = df['label']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)


y_pred = model.predict(X_test)


acc = accuracy_score(y_test, y_pred)
print(f" 분류 정확도: {acc:.2f}")


print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# 혼동 행렬 시각화
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=model.classes_, yticklabels=model.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()

RGB → HSV로 변환

**가장 지배적인 색상(Ratio1 기준)**만 사용

색상 계열(color_group) → One-Hot 인코딩

In [None]:
df_simple = df[['filename', 'R1', 'G1', 'B1', 'Ratio1']].copy()
df_simple['label'] = df['filename'].apply(lambda x: x.split('_')[0].lower())

In [None]:
def rgb_to_hsv(r, g, b):
    color = np.uint8([[[r, g, b]]])
    hsv = cv2.cvtColor(color, cv2.COLOR_RGB2HSV)[0][0]
    return hsv[0], hsv[1], hsv[2]

# HSV 컬럼 추가
df_simple[['H', 'S', 'V']] = df_simple.apply(lambda row: pd.Series(rgb_to_hsv(row['R1'], row['G1'], row['B1'])), axis=1)

In [None]:
df_simple = df_simple.drop(columns=['R1', 'G1', 'B1'])

In [None]:
df_simple['color_group'] = df_simple['H'].apply(utils.classify_color_group_from_h)

In [None]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse=False)
encoded = encoder.fit_transform(df_simple[['color_group']])
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(['color_group']))

In [None]:
df_final = pd.concat([df_simple.drop(columns=['color_group', 'filename']), encoded_df], axis=1)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

X = df_final.drop(columns=['label'])
y = df_final['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"\n 개선된 모델 정확도: {acc:.2f}")
print("\n Classification Report:")
print(classification_report(y_test, y_pred))