In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from PIL import Image

In [2]:
label_index = {"dry": 0, "normal": 1, "oily": 2}

def create_df(base):
    dd = {"images": [], "labels": []}
    for label_name in os.listdir(base):
        label_path = os.path.join(base, label_name)
        if os.path.isdir(label_path):  
            for img_name in os.listdir(label_path):
                img_path = os.path.join(label_path, img_name)
                dd["images"].append(img_path)
                dd["labels"].append(label_index[label_name])
    return pd.DataFrame(dd)

In [3]:
train_path = "Skin_Type_Classification/Oily-Dry-Skin_Faces_only/train"
train_df = create_df(train_path)
test_path = "Skin_Type_Classification/Oily-Dry-Skin_Faces_only/test"
test_df = create_df(test_path)

In [4]:
def load_image_rgb(image_path):
    with Image.open(image_path) as img:
        return img.convert("RGB")


In [5]:
def open_data(df):
    images = []
    
    for index, row in df.iterrows():
        img = Image.open(row['images']).convert('RGB')
        img = np.array(img) / 255.0
        images.append(img)

    return images

train_data = open_data(train_df)
test_data = open_data(test_df)
train_data = np.array(train_data)
test_data = np.array(test_data)

In [6]:
train_label = train_df['labels']
test_label = test_df['labels']

In [7]:
np.save("train_data.npy", train_data)
np.save("test_data.npy", test_data)
np.save("train_label.npy", train_label)
np.save("test_label.npy", test_label)

In [15]:
train_data.shape

(2756, 650, 650, 3)

In [16]:
flattened_train_data = train_data.reshape(train_data.shape[0], -1)
flattened_test_data = test_data.reshape(test_data.shape[0], -1)

In [17]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [18]:
logreg = LogisticRegression(multi_class='multinomial', solver='newton-cg', max_iter=1000)
logreg.fit(flattened_train_data, train_label.ravel())

  logreg.fit(flattened_train_data, train_label.ravel())


KeyboardInterrupt: 

In [None]:
y_pred = logreg.predict(flattened_test_data)
print("Accuracy: ", accuracy_score(test_label, test_label))

## PCA

In [19]:
from sklearn.decomposition import PCA

def apply_pca_to_channels(data, n_components=50):
    pca = PCA(n_components=n_components)
    
    channel_1 = data[:, :, :, 0]
    channel_2 = data[:, :, :, 1]
    channel_3 = data[:, :, :, 2]
    
    channel_1_flat = channel_1.reshape(data.shape[0], -1)
    channel_2_flat = channel_2.reshape(data.shape[0], -1)
    channel_3_flat = channel_3.reshape(data.shape[0], -1)
    
    channel_1_pca = pca.fit_transform(channel_1_flat)
    channel_2_pca = pca.fit_transform(channel_2_flat)
    channel_3_pca = pca.fit_transform(channel_3_flat)
    
    return channel_1_pca, channel_2_pca, channel_3_pca



In [20]:
n_components = 50
channel_1_pca_train, channel_2_pca_train, channel_3_pca_train = apply_pca_to_channels(train_data, n_components)
channel_1_pca_test, channel_2_pca_test, channel_3_pca_test = apply_pca_to_channels(test_data, n_components)

pca_train_data = np.hstack((channel_1_pca_train, channel_2_pca_train, channel_3_pca_train))
pca_test_data = np.hstack((channel_1_pca_test, channel_2_pca_test, channel_3_pca_test))

In [21]:
logreg = LogisticRegression(multi_class='multinomial', solver='newton-cg', max_iter=1000)
logreg.fit(pca_train_data, train_label.ravel())

  logreg.fit(pca_train_data, train_label.ravel())


In [22]:
y_pred = logreg.predict(pca_test_data)
print("Accuracy: ", accuracy_score(test_label, y_pred))

Accuracy:  0.2835820895522388


In [23]:
from sklearn.svm import SVC

In [24]:
svm_model = SVC(kernel='rbf', max_iter=10000, decision_function_shape='ovr')

In [25]:
svm_model.fit(pca_train_data, train_label.ravel())

  svm_model.fit(pca_train_data, train_label.ravel())


In [26]:
ypred = svm_model.predict(pca_test_data)
print("Accuracy: ", accuracy_score(test_label, ypred))

Accuracy:  0.4253731343283582


In [27]:
ypred

array([1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1,
       2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 0, 1, 2, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 0, 2, 2, 1, 1, 1, 0, 2, 2, 1,
       1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 0,
       1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 2, 1, 1, 0, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 2,
       1, 2], dtype=int64)

In [28]:
test_label

0      0
1      0
2      0
3      0
4      0
      ..
129    2
130    2
131    2
132    2
133    2
Name: labels, Length: 134, dtype: int64