### 1. Import Image Dataset
from file `ISIC_2017_Data_GroundTruth_Classification.csv`, get the value of `melanoma` corresponding for each `image_id` inside directory `PROJECT_Data` only

In [38]:
import pandas as pd


df = pd.read_csv('ISIC_2017_Data_GroundTruth_Classification.csv')
df.head()

Unnamed: 0,image_id,melanoma,seborrheic_keratosis
0,ISIC_0000000,0.0,0.0
1,ISIC_0000001,0.0,0.0
2,ISIC_0000002,1.0,0.0
3,ISIC_0000003,0.0,0.0
4,ISIC_0000004,1.0,0.0


In [39]:
import feature_extraction as bjir
import os

dataset_path = 'D:/compvs/Project/PROJECT_Data' # enih pathnya diganti

ids = sorted(list(set([filename[:12] for filename in os.listdir(dataset_path)]))) # ngok
len(ids)

200

### 2. Extract Features and Save as CSV
calculate value of:
1. asymmetry
2. border_irregularity
3. color_irregularity
4. differential_structure
   
for each `image_id` inside `PROJECT_Data`, then save as `dataset.csv` 

In [40]:
features = {
    'image_id': [],
    'melanoma': [],
    'asymmetry': [],
    'border_irregularity': [],
    'color_irregularity': [],
    'differential_structure': []
}

for id in ids:
    img_features = bjir.get_features(dataset_path, id)
    img_features['image_id'] = id
    img_features['melanoma'] = int(df[df['image_id'] == id].iloc[0]['melanoma'])
    for feature_name in features:
        features[feature_name].append(img_features[feature_name])

fd = pd.DataFrame(features)

In [41]:
fd.head(10)

Unnamed: 0,image_id,melanoma,asymmetry,border_irregularity,color_irregularity,differential_structure
0,ISIC_0000000,0,0.004596,1.376352,39.668585,3.3e-05
1,ISIC_0000001,0,0.001275,1.617571,26.345351,0.000222
2,ISIC_0000002,1,0.002081,1.958622,27.968183,5e-05
3,ISIC_0000003,0,0.004378,1.515998,33.766395,4.6e-05
4,ISIC_0000004,1,0.005216,1.270549,34.367256,4.4e-05
5,ISIC_0000006,0,0.005461,1.841533,25.227694,0.000171
6,ISIC_0000007,0,0.001515,1.394911,25.89716,0.000176
7,ISIC_0000008,0,0.001178,1.652024,33.650311,4.6e-05
8,ISIC_0000009,0,0.004175,1.713577,25.223572,6.7e-05
9,ISIC_0000010,0,0.001681,1.591705,27.653117,0.000191


In [42]:
fd.describe()
fd.to_csv('dataset.csv')

### 3. Train AI Classifier
using `dataset.csv`, build an AI Classifier to predict `melanoma` (either 1 or 0) using value of 
1. asymmetry
2. border_irregularity
3. color_irregularity
4. differential_structure

#### 3.1 Machine Learning
Logistic Regression, SVM, KNN, LDA

In [48]:
# NOTE: mending di run di google.colab, upload 'dataset.csv' yg udah diekstrak

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

ngok = pd.read_csv('dataset.csv')

X = ngok.drop(['melanoma', 'image_id'], axis=1)
y = ngok['melanoma']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

print(classification_report(y_test, y_pred))

Accuracy: 0.90
              precision    recall  f1-score   support

           0       0.90      0.90      0.90        21
           1       0.89      0.89      0.89        19

    accuracy                           0.90        40
   macro avg       0.90      0.90      0.90        40
weighted avg       0.90      0.90      0.90        40



In [59]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

svm_model = SVC(kernel='linear', C=1.0)

svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print(classification_report(y_test, y_pred))

Accuracy: 0.93
              precision    recall  f1-score   support

           0       0.91      0.95      0.93        21
           1       0.94      0.89      0.92        19

    accuracy                           0.93        40
   macro avg       0.93      0.92      0.92        40
weighted avg       0.93      0.93      0.92        40



In [60]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

knn_model = KNeighborsClassifier(n_neighbors=20)
knn_model.fit(X_train, y_train)

y_pred = knn_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print(classification_report(y_test, y_pred))

Accuracy: 0.95
              precision    recall  f1-score   support

           0       0.91      1.00      0.95        21
           1       1.00      0.89      0.94        19

    accuracy                           0.95        40
   macro avg       0.96      0.95      0.95        40
weighted avg       0.95      0.95      0.95        40



In [62]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split

lda_model = LinearDiscriminantAnalysis()
lda_model.fit(X_train, y_train)

y_pred = knn_model.predict(X_test)
test_accuracy = lda_model.score(X_test, y_test)
print("Test accuracy:", test_accuracy)
print(classification_report(y_test, y_pred))

Test accuracy: 0.925
              precision    recall  f1-score   support

           0       0.91      1.00      0.95        21
           1       1.00      0.89      0.94        19

    accuracy                           0.95        40
   macro avg       0.96      0.95      0.95        40
weighted avg       0.95      0.95      0.95        40



#### 3.2 Neural Network


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy

# Load the dataset (assuming it's a CSV file)
data = pd.read_csv('dataset.csv')

# Split the dataset into features and labels
X = data[['asymmetry', 'border_irregularity', 'color_irregularity', 'differential_structure']]
y = data['melanoma']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build the neural network
model = Sequential()
model.add(Dense(16, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), 
              loss=BinaryCrossentropy(), 
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=20, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")

# Predict on the test set
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)  # Convert probabilities to binary predictions

# Print the classification report
print(classification_report(y_test, y_pred))

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 0.5157 - loss: 0.7240 - val_accuracy: 0.5312 - val_loss: 0.7094
Epoch 2/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4306 - loss: 0.7505 - val_accuracy: 0.5312 - val_loss: 0.6997
Epoch 3/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5356 - loss: 0.7015 - val_accuracy: 0.5938 - val_loss: 0.6926
Epoch 4/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5535 - loss: 0.6935 - val_accuracy: 0.5625 - val_loss: 0.6865
Epoch 5/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5002 - loss: 0.6937 - val_accuracy: 0.5625 - val_loss: 0.6813
Epoch 6/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4996 - loss: 0.6896 - val_accuracy: 0.5312 - val_loss: 0.6771
Epoch 7/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37