# Project 3: 'Image Analysis'
## First Year Project  
### ITU, Spring 2021

This notebook contains all the code developed to explore, wrangle and analyse the raw data sets for our project, 'Image Analysis'.

Contributors:  
- Andy Bao Nguyen (anbn)
- Florian Micliuc (flmi)
- Mattias Wohlert 
- Sofia Elena Terenziani (sote)

Created: 06-04-2021 

Last modified:

### Imports

In [1]:
import pandas as pd
import numpy as np
from  matplotlib import pyplot as plt
import matplotlib.image as mpimg
import os
import random
import cv2

### Constants

In [2]:
#We are using Micheals implementation from 'Lecture 1'
PATH = {}
PATH["data_raw"] = "../../data/raw/"
PATH["data_interim"] = "../../data/interim/"
PATH["data_processed"] = "../../data/processed/"
PATH["data_external"] = "../../data/external/"

PATH["images"] = "../../data/Raw/example_image"
PATH["images_segmentations"] = "../../data/raw/example_segmentation"
PATH["images_features"] = "../../data/features/"

FILENAME = {}
FILENAME["metadata"] = "example_ground_truth.csv"
FILENAME["images_features"] = "features.csv"

### Loading raw data

In [3]:
def load_images_from_folder(folder):
    images = []
    valid_images = [".jpg"]
    for filename in os.listdir(folder):
        ext = os.path.splitext(filename)[1]
        if ext.lower() not in valid_images:
            continue
        img = cv2.imread(os.path.join(folder,filename))
        if img is not None:
            images.append(img)
    return images

def images_id_from_folder(folder): 
    valid_images = [".jpg"]
    onlyfiles = []
    for filename in os.listdir(folder): 
        ext = os.path.splitext(filename)[1]
        if ext.lower() not in valid_images:
            continue
        if os.path.isfile(os.path.join(folder, filename)): 
            onlyfiles.append(filename)
    return onlyfiles

In [4]:
dataraw = {} 
dataraw["metadata"] = pd.read_csv(PATH["data_raw"]+FILENAME["metadata"])
dataraw["features"] = pd.read_csv(PATH["images_features"]+FILENAME["images_features"])
dataraw["images"] = load_images_from_folder(PATH["images"])
dataraw["images_filename"] = images_id_from_folder(PATH["images"])

## Task 0: Explore the data

In [5]:
#merging the datasets 
metadata = pd.DataFrame(dataraw["metadata"])
features = pd.DataFrame(dataraw["features"])
merged_data = metadata.merge(features, left_on = ["image_id"], right_on = ["id"])
merged_data = merged_data.drop(["id"], axis =1)
print(merged_data)

         image_id  melanoma  seborrheic_keratosis        area  perimeter
0    ISIC_0001769       0.0                   0.0    216160.0     2013.0
1    ISIC_0001852       0.0                   0.0    130493.0     1372.0
2    ISIC_0001871       0.0                   0.0    205116.0     1720.0
3    ISIC_0003462       0.0                   0.0    161705.0     1344.0
4    ISIC_0003539       0.0                   0.0    317040.0     2063.0
..            ...       ...                   ...         ...        ...
145  ISIC_0015443       0.0                   0.0   9732988.0    13775.0
146  ISIC_0015445       0.0                   0.0  12049938.0    19517.0
147  ISIC_0015483       0.0                   0.0   8856478.0    13813.0
148  ISIC_0015496       0.0                   0.0   8951522.0    13742.0
149  ISIC_0015627       0.0                   0.0   3193093.0     8751.0

[150 rows x 5 columns]


In [6]:
#mask 
mask_melanoma = (merged_data["melanoma"] == 1.0) & (merged_data["seborrheic_keratosis"] == 0.0)
mask_keratosis = (merged_data["melanoma"] == 0.0) & (merged_data["seborrheic_keratosis"] == 1.0)
mask_none = (merged_data["melanoma"] == 0.0) & (merged_data["seborrheic_keratosis"] == 0.0)

merged_melanoma = merged_data[mask_melanoma]
merged_keratosis = merged_data[mask_keratosis]
merged_none = merged_data[mask_none]


In [8]:
#images id and actual images lists 
melanoma_images_id = [i for i in merged_melanoma["image_id"]]
keratosis_images_id = [i for i in merged_keratosis["image_id"]]
none_images_id = [i for i in merged_none["image_id"]]

melanoma_images = [i for i in dataraw["images_filename"] for j in melanoma_images_id if j in i]
keratosis_images = [i for i in dataraw["images_filename"] for j in keratosis_images_id if j in i]
none_images = [i for i in dataraw["images_filename"] for j in none_images_id if j in i]

In [None]:
#NB!!!! Only 
from shutil import copyfile

subfolder1 = os.path.join(PATH["data_interim"], "melanoma_images")
subfolder2 = os.path.join(PATH["data_interim"], "keratosis_images")
subfolder3 = os.path.join(PATH["data_interim"], "none_images")

if not os.path.exists(subfolder1):
    os.makedirs(subfolder1)

if not os.path.exists(subfolder2):
    os.makedirs(subfolder2)
    
if not os.path.exists(subfolder3):
    os.makedirs(subfolder3)
    
for i in melanoma_images:
    source = os.path.join(PATH["images"], i)
    destination = os.path.join(subfolder1, i)
    copyfile(source, destination)
    
for i in keratosis_images:
    source = os.path.join(PATH["images"], i)
    destination = os.path.join(subfolder2, i)
    copyfile(source, destination)

for i in none_images: 
    source = os.path.join(PATH["images"], i)
    destination = os.path.join(subfolder3, i)
    copyfile(source, destination)

## Task 0: Explore the data

In [10]:
for i in benign_images: 
    print(i)

ISIC_0015372.jpg
ISIC_0004337.jpg
ISIC_0008524.jpg
ISIC_0013104.jpg
ISIC_0012965.jpg
ISIC_0015401.jpg
ISIC_0003539.jpg
ISIC_0012191.jpg
ISIC_0015211.jpg
ISIC_0001852.jpg
ISIC_0009995.jpg
ISIC_0003462.jpg
ISIC_0012547.jpg
ISIC_0012221.jpg
ISIC_0006651.jpg
ISIC_0012746.jpg
ISIC_0006914.jpg
ISIC_0013128.jpg
ISIC_0012222.jpg
ISIC_0012126.jpg
ISIC_0013562.jpg
ISIC_0012127.jpg
ISIC_0007528.jpg
ISIC_0013561.jpg
ISIC_0014178.jpg
ISIC_0015313.jpg
ISIC_0013010.jpg
ISIC_0012256.jpg
ISIC_0014829.jpg
ISIC_0008025.jpg
ISIC_0012684.jpg
ISIC_0006815.jpg
ISIC_0012109.jpg
ISIC_0012335.jpg
ISIC_0012876.jpg
ISIC_0013188.jpg
ISIC_0014989.jpg
ISIC_0001769.jpg
ISIC_0012660.jpg
ISIC_0014211.jpg
ISIC_0015443.jpg
ISIC_0012306.jpg
ISIC_0015496.jpg
ISIC_0010459.jpg
ISIC_0007332.jpg
ISIC_0015483.jpg
ISIC_0014945.jpg
ISIC_0012313.jpg
ISIC_0003805.jpg
ISIC_0012538.jpg
ISIC_0003582.jpg
ISIC_0012316.jpg
ISIC_0015445.jpg
ISIC_0013793.jpg
ISIC_0007241.jpg
ISIC_0015043.jpg
ISIC_0014162.jpg
ISIC_0012400.jpg
ISIC_0001871.j