### 1. Import Image Dataset
from file `ISIC_2017_Data_GroundTruth_Classification.csv`, get the value of `melanoma` corresponding for each `image_id` inside directory `PROJECT_Data` only

#### 1.1 Read the data

In [1]:
import pandas as pd

df = pd.read_csv('ISIC_2017_Data_GroundTruth_Classification.csv')
df.head()

Unnamed: 0,image_id,melanoma,seborrheic_keratosis
0,ISIC_0000000,0.0,0.0
1,ISIC_0000001,0.0,0.0
2,ISIC_0000002,1.0,0.0
3,ISIC_0000003,0.0,0.0
4,ISIC_0000004,1.0,0.0


In [2]:
import os

dataset_path = 'D:/compvs/Project/uas-pcvk/PROJECT_Data' # enih pathnya diganti

ids = sorted(list(set([filename[:12] for filename in os.listdir(dataset_path)]))) # ngok
len(ids)

200

#### 1.2 Data augmentation
- rotate 15 deg
- rotate 345 deg

In [32]:
import cv2

def augment_images(dataset_path, image_id):
    image = cv2.imread(f'{dataset_path}/{image_id}.jpg')
    seg   = cv2.imread(f'{dataset_path}/{image_id}_segmentation.png')
    output_path = f'{dataset_path}/../PROJECT_Data_Augmented/{image_id}'

    def rotate_image(image, angle):
        (h, w) = image.shape[:2]
        center = (w // 2, h // 2)
        matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
        return cv2.warpAffine(image, matrix, (w, h))

    rotated_image = rotate_image(image, 15)
    rotated_seg   = rotate_image(seg, 15)
    cv2.imwrite(f'{output_path}_r15.jpg', rotated_image)
    cv2.imwrite(f'{output_path}_r15_segmentation.png', rotated_seg)
    
    rotated_image = rotate_image(image, 345)
    rotated_seg   = rotate_image(seg, 345)
    cv2.imwrite(f'{output_path}_r345.jpg', rotated_image)
    cv2.imwrite(f'{output_path}_r345_segmentation.png', rotated_seg)

In [33]:
for image_id in ids:
    augment_images(dataset_path, image_id)

#### 1.3 Extract Features and Save as CSV
calculate value of:
1. symmetry
2. reciprocal circularity (1/circularity)
3. std deviation of RGB value
   
for each `image_id` inside `PROJECT_Data`, then save as `dataset.csv` 

In [10]:
def get_features(dataset_path, image_id):
    import numpy as np
    
    segmented = cv2.cvtColor(cv2.imread(f'{dataset_path}/{image_id}_segmentation.png'), cv2.COLOR_BGR2GRAY)
    image = cv2.imread(f'{dataset_path}/{image_id}.jpg')

    contours, _ = cv2.findContours(segmented, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = max(contours, key=cv2.contourArea)

    def calc_symmetry(segmented_image):
        M = cv2.moments(segmented_image)
        cX = int(M["m10"] / M["m00"])
        cY = int(M["m01"] / M["m00"])
        
        top_left = segmented_image[:cY, :cX]
        top_right = segmented_image[:cY, cX:]
        bottom_left = segmented_image[cY:, :cX]
        bottom_right = segmented_image[cY:, cX:]
        
        symmetry_score = (cv2.matchShapes(top_left, bottom_right, 1, 0.0) + 
                        cv2.matchShapes(top_right, bottom_left, 1, 0.0)) / 2
        return { 'symmetry': symmetry_score }
    
    def calc_reciprocal_circularity(contour):
        perimeter = cv2.arcLength(contour, True)
        area = cv2.contourArea(contour)
        return { '1/circularity': perimeter**2 / (4 * np.pi * area) }

    def calc_std_color(image, segmented_image):
        masked_image = cv2.bitwise_and(image, image, mask=segmented_image)
        color_stddev = np.std(masked_image[segmented_image > 0], axis=0)
        return { 'std_color': np.mean(color_stddev) }

    features = {}

    # features.update(calc_hsv(image, segmented))
    features.update(calc_symmetry(segmented))
    features.update(calc_std_color(image, segmented))
    features.update(calc_reciprocal_circularity(contour))

    return features

In [18]:
features = {
    'image_id': [],
    'melanoma': []
}

for id in ids:
    img_features = get_features(dataset_path, id)
    img_features['image_id'] = id
    img_features['melanoma'] = int(df[df['image_id'] == id].iloc[0]['melanoma'])
    for feature_name in img_features:
        if feature_name not in features:
            features[feature_name] = []
        features[feature_name].append(img_features[feature_name])

fd = pd.DataFrame(features)

In [19]:
fd.to_csv('features.csv')
fd.head()

Unnamed: 0,image_id,melanoma,symmetry,std_color,1/circularity
0,ISIC_0000000,0,0.008072,39.36934,1.463696
1,ISIC_0000001,0,0.001034,25.947679,1.716384
2,ISIC_0000002,1,0.003091,27.861042,1.973328
3,ISIC_0000003,0,0.003943,33.612228,1.465988
4,ISIC_0000004,1,0.00296,34.173329,1.202164


### 2. Train AI Classifier
using `dataset.csv`, build an AI Classifier to predict `melanoma` (either 1 or 0) using value of 
1. asymmetry
2. border_irregularity
3. color_irregularity
4. differential_structure

#### 2.1 Split and augment the dataset
split into test and train dataset, then augment the train data

In [28]:
from sklearn.model_selection import train_test_split
import pandas as pd

df = pd.read_csv('features.csv')
y = df['melanoma']

df_train, df_test, y_train, y_test = train_test_split(df, y, test_size=0.2, random_state=42)

X_test  = df_test[['symmetry','std_color','1/circularity']]
X_train = df_train[['symmetry','std_color','1/circularity']]

In [38]:
aug_path  = f'{dataset_path}/../PROJECT_Data_Augmented'
aug_X_train = []
aug_y_train = []

for index, row in df_train.iterrows():
    for aug in ['r15', 'r345']:
        aug_features = get_features(aug_path, f'{row['image_id']}_{aug}')

        aug_X_train.append(aug_features)
        aug_y_train.append(row['melanoma'])

In [43]:
temp_X = pd.DataFrame.from_records(aug_X_train)
temp_y = pd.Series(aug_y_train)

X_train = pd.concat([X_train, temp_X], ignore_index=True)
y_train = pd.concat([y_train, temp_y], ignore_index=True)

#### 2.1 Machine Learning
Logistic Regression, SVM, KNN, LDA

In [27]:
# NOTE: mending di run di google.colab, upload 'dataset.csv' yg udah diekstrak

import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

print(classification_report(y_test, y_pred))

Accuracy: 0.70
              precision    recall  f1-score   support

           0       0.74      0.67      0.70        21
           1       0.67      0.74      0.70        19

    accuracy                           0.70        40
   macro avg       0.70      0.70      0.70        40
weighted avg       0.70      0.70      0.70        40



In [23]:
from sklearn.svm import SVC

svm_model = SVC(kernel='linear', C=1.0)

svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print(classification_report(y_test, y_pred))

Accuracy: 0.70
              precision    recall  f1-score   support

           0       0.71      0.71      0.71        21
           1       0.68      0.68      0.68        19

    accuracy                           0.70        40
   macro avg       0.70      0.70      0.70        40
weighted avg       0.70      0.70      0.70        40



In [24]:
from sklearn.neighbors import KNeighborsClassifier

knn_model = KNeighborsClassifier(n_neighbors=20)
knn_model.fit(X_train, y_train)

y_pred = knn_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print(classification_report(y_test, y_pred))

Accuracy: 0.57
              precision    recall  f1-score   support

           0       0.64      0.43      0.51        21
           1       0.54      0.74      0.62        19

    accuracy                           0.57        40
   macro avg       0.59      0.58      0.57        40
weighted avg       0.59      0.57      0.57        40



In [25]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

lda_model = LinearDiscriminantAnalysis()
lda_model.fit(X_train, y_train)

y_pred = knn_model.predict(X_test)
test_accuracy = lda_model.score(X_test, y_test)
print("Test accuracy:", test_accuracy)
print(classification_report(y_test, y_pred))

Test accuracy: 0.55
              precision    recall  f1-score   support

           0       0.64      0.43      0.51        21
           1       0.54      0.74      0.62        19

    accuracy                           0.57        40
   macro avg       0.59      0.58      0.57        40
weighted avg       0.59      0.57      0.57        40



#### 3.2 Neural Network
(akurasinya masi rendah coi :"v)

In [26]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy

data = pd.read_csv('dataset.csv')
X = data[['asymmetry', 'border_irregularity', 'color_irregularity', 'differential_structure']]
y = data['melanoma']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = Sequential()
model.add(Dense(16, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer=Adam(learning_rate=0.001), 
              loss=BinaryCrossentropy(), 
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=50, batch_size=10, validation_split=0.2)

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int) # probabilitas -> klasifikasi

print(classification_report(y_test, y_pred))

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.6125 - loss: 0.6720 - val_accuracy: 0.5625 - val_loss: 0.6559
Epoch 2/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6059 - loss: 0.6713 - val_accuracy: 0.6875 - val_loss: 0.6504
Epoch 3/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5678 - loss: 0.6626 - val_accuracy: 0.7812 - val_loss: 0.6460
Epoch 4/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5893 - loss: 0.6636 - val_accuracy: 0.8125 - val_loss: 0.6418
Epoch 5/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6332 - loss: 0.6692 - val_accuracy: 0.8125 - val_loss: 0.6376
Epoch 6/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6947 - loss: 0.6476 - val_accuracy: 0.8125 - val_loss: 0.6338
Epoch 7/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 