<a href="https://colab.research.google.com/github/GitData-GA/shot-marilyns-analysis/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import libraries and scripts

In [None]:
import os
import shutil
import sys

!git clone https://github.com/GitData-GA/shot-marilyns-analysis.git
sys.path.insert(0, './shot-marilyns-analysis/src')

import sma
os.makedirs(r'img')

# Improt images and prepare data

Source code of used functions in this section:

- `sma.utils.save_img`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/utils/save_img.py

- `sma.utils.np_convert`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/utils/np_convert.py

- `sma.utils.pd_convert`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/utils/pd_convert.py


In [None]:
img_links = {
    "orange_marilyn": "https://shotmarilyns.gd.edu.kg/assets/img/1_1_orange_marilyn.jpg",
    "red_marilyn": "https://shotmarilyns.gd.edu.kg/assets/img/1_2_red_marilyn.jpg",
    "turq_marilyn": "https://shotmarilyns.gd.edu.kg/assets/img/1_3_turq_marilyn.jpg",
    "blue_marilyn": "https://shotmarilyns.gd.edu.kg/assets/img/1_4_blue_marilyn.jpg",
    "eggblue_marilyn": "https://shotmarilyns.gd.edu.kg/assets/img/1_5_eggblue_marilyn.jpg"
}

## Save images to local directory

In [None]:
sma.utils.save_img(img_links=img_links, img_idx=1, verbose=True)

## Store the images as a NumPy array

In [None]:
np_img = sma.utils.np_convert(img_links=img_links)
np_img['blue_marilyn']

## Store the images as a dictionary of 5 Pandas dataframes with HEX codes

In [None]:
pd_img = sma.utils.pd_convert(np_img=np_img)
pd_img['orange_marilyn']

# Visualization

Source code of used functions in this section:

- `sma.plot.distribution`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/plot/distribution.py

- `sma.plot.entropy_heatmap`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/plot/entropy_heatmap.py

- `sma.plot.scatter`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/plot/scatter.py

- `sma.cluster.kmeans`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/cluster/kmeans.py

- `sma.plot.bar`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/plot/bar.py

- `sma.plot.ribbon`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/plot/ribbon.py

## Distribution plots

In [None]:
sma.plot.distribution(np_img=np_img, img_idx=2, verbose=True)

## Relative conditional entropy plots

In [None]:
sma.plot.entropy_heatmap(np_img=np_img, img_idx=3, verbose=True)

## RGB space scatterplots

In [None]:
sma.plot.scatter(pd_img=pd_img, img_idx=4, verbose=True)

## KMeans clustering

In [None]:
kmean_result = sma.cluster.kmeans(pd_img=pd_img, n_clusters=10)

### Scatter plot by clusters

In [None]:
sma.plot.scatter(pd_img=pd_img, img_idx=5, kmeans=kmean_result, verbose=True)

### Bar chart by clusters

In [None]:
sma.plot.bar(pd_img=pd_img, img_idx=6, kmeans=kmean_result, verbose=True)

### Color ribbon by clusters

In [None]:
sma.plot.ribbon(pd_img=pd_img, img_idx=7, kmeans=kmean_result, verbose=True)

# Region of interest

Source code of used functions in this section:

- `sma.roi.extract`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/roi/extract.py

## Orange Marilyn

### Extraction

In [None]:
orange_marilyn_roi = {}

for param in [
    ("8_1", "orange_marilyn_background", "sampling", [(0, 100, 0, 100)], None),
    ("8_2", "orange_marilyn_hair", "sampling", [(100, 150, 300, 400)], None),
    ("8_3", "orange_marilyn_eyeshadow", "sampling", [(460, 490, 450, 550)], None),
    ("8_4", "orange_marilyn_face", "sampling", [(300, 418, 300, 610)], None),
]:
    orange_marilyn_roi[param[1]] = sma.roi.extract(
        np_img=np_img,
        key="orange_marilyn",
        img_idx=param[0],
        extraction_name=param[1],
        param={"method": param[2], "value": param[3]},
        fix=param[4],
        verbose=True
    )

### Plots

#### Data preparation and clurstering

In [None]:
pd_img_orange_roi = {k: v[v['hex'] != '#000000'] for k, v in sma.utils.pd_convert(orange_marilyn_roi).items()}
kmeans_orange_roi = sma.cluster.kmeans(pd_img_orange_roi, n_clusters=15)

#### Scatter plot with clusters

In [None]:
sma.plot.scatter(pd_img=pd_img_orange_roi, img_idx=9, kmeans=kmeans_orange_roi, verbose=True)

#### Bar chart

In [None]:
sma.plot.bar(pd_img=pd_img_orange_roi, img_idx=10, kmeans=kmeans_orange_roi, verbose=True)

#### Color ribbon

In [None]:
sma.plot.ribbon(pd_img=pd_img_orange_roi, img_idx=11, kmeans=kmeans_orange_roi, verbose=True)

## Red Marilyn

### Extraction

In [None]:
red_marilyn_roi = {}

for param in [
    ("12_1", "red_marilyn_background", "sampling", [(0, 960, 0, 30), (0, 960, 930, 960)], (690, 800, 275, 475)),
    ("12_2", "red_marilyn_hair", "sampling", [(10, 200, 300, 380), (10, 200, 440, 520)], None),
    ("12_3", "red_marilyn_eyeshadow", "sampling", [(455, 480, 455, 520)], None),
    ("12_4", "red_marilyn_face", "sampling", [(280, 460, 310, 435)], None),
]:
    red_marilyn_roi[param[1]] = sma.roi.extract(
        np_img=np_img,
        key="red_marilyn",
        img_idx=param[0],
        extraction_name=param[1],
        param={"method": param[2], "value": param[3]},
        fix=param[4],
        verbose=True
    )

### Plots

#### Data preparation and clurstering

In [None]:
pd_img_red_roi = {k: v[v['hex'] != '#000000'] for k, v in sma.utils.pd_convert(red_marilyn_roi).items()}
kmeans_red_roi = sma.cluster.kmeans(pd_img_red_roi, n_clusters=15)

#### Scatter plot with clusters

In [None]:
sma.plot.scatter(pd_img=pd_img_red_roi, img_idx=13, kmeans=kmeans_red_roi, verbose=True)

#### Bar chart


In [None]:
sma.plot.bar(pd_img=pd_img_red_roi, img_idx=14, kmeans=kmeans_red_roi, verbose=True)

#### Color ribbon

In [None]:
sma.plot.ribbon(pd_img=pd_img_red_roi, img_idx=15, kmeans=kmeans_red_roi, verbose=True)

## Turq Marilyn

### Extraction

In [None]:
turq_marilyn_roi = {}

for param in [
    ("16_1", "turq_marilyn_background", "sampling", [(0, 960, 0, 40), (0, 960, 930, 970)], None),
    ("16_2", "turq_marilyn_hair", "exact", [(19, 101, 70), (28, 255, 255)], None),
    ("16_3", "turq_marilyn_eyeshadow", "sampling", [(470, 490, 300, 320), (470, 490, 500, 550)], None),
    ("16_4", "turq_marilyn_face", "exact", [(0, 0, 0), (17, 100, 255)], None)
]:
    turq_marilyn_roi[param[1]] = sma.roi.extract(
        np_img=np_img,
        key="turq_marilyn",
        img_idx=param[0],
        extraction_name=param[1],
        param={"method": param[2], "value": param[3]},
        fix=param[4],
        verbose=True
    )

### Plots

#### Data preparation and clurstering

In [None]:
pd_img_turq_roi = {k: v[v['hex'] != '#000000'] for k, v in sma.utils.pd_convert(turq_marilyn_roi).items()}
kmeans_turq_roi = sma.cluster.kmeans(pd_img_turq_roi, n_clusters=15)

#### Scatter plot with clusters

In [None]:
sma.plot.scatter(pd_img=pd_img_turq_roi, img_idx=17, kmeans=kmeans_turq_roi, verbose=True)

#### Bar chart

In [None]:
sma.plot.bar(pd_img=pd_img_turq_roi, img_idx=18, kmeans=kmeans_turq_roi, verbose=True)

#### Color ribbon

In [None]:
sma.plot.ribbon(pd_img=pd_img_turq_roi, img_idx=19, kmeans=kmeans_turq_roi, verbose=True)

## Blue Marilyn

### Extraction

In [None]:
blue_marilyn_roi = {}

for param in [
    ("20_1", "blue_marilyn_background", "sampling", [(0, 145, 0, 145)], None),
    ("20_2", "blue_marilyn_hair", "sampling", [(45, 200, 485, 590)], None),
    ("20_3", "blue_marilyn_eyeshadow", "sampling", [(470, 490, 300, 320)], (600, 960, 600, 800)),
    ("20_4", "blue_marilyn_face", "exact", [(110, 40, 0), (255, 255, 255)], None),
]:
    blue_marilyn_roi[param[1]] = sma.roi.extract(
        np_img=np_img,
        key="blue_marilyn",
        img_idx=param[0],
        extraction_name=param[1],
        param={"method": param[2], "value": param[3]},
        fix=param[4],
        verbose=True
    )

### Plots

#### Data preparation and clurstering

In [None]:
pd_img_blue_roi = {k: v[v['hex'] != '#000000'] for k, v in sma.utils.pd_convert(blue_marilyn_roi).items()}
kmeans_blue_roi = sma.cluster.kmeans(pd_img_blue_roi, n_clusters=15)

#### Scatter plot with clusters

In [None]:
sma.plot.scatter(pd_img=pd_img_blue_roi, img_idx=21, kmeans=kmeans_blue_roi, verbose=True)

#### Bar chart

In [None]:
sma.plot.bar(pd_img=pd_img_blue_roi, img_idx=22, kmeans=kmeans_blue_roi, verbose=True)

#### Color ribbon

In [None]:
sma.plot.ribbon(pd_img=pd_img_blue_roi, img_idx=23, kmeans=kmeans_blue_roi, verbose=True)

## Eggblue Marilyn

### Extraction



In [None]:
eggblue_marilyn_roi = {}

for param in [
    ("24_1", "eggblue_marilyn_background", "exact", [(76, 31, 178), (88, 61, 210)], None),
    ("24_2", "eggblue_marilyn_hair", "exact", [(12, 60, 60), (27, 181, 232)], None),
    ("24_3", "eggblue_marilyn_eyeshadow", "sampling", [(460, 480, 470, 500)], (600, 960, 600, 800)),
    ("24_4", "eggblue_marilyn_face", "exact", [(110, 40, 0), (255, 255, 255)], None),
]:
    eggblue_marilyn_roi[param[1]] = sma.roi.extract(
        np_img=np_img,
        key="eggblue_marilyn",
        img_idx=param[0],
        extraction_name=param[1],
        param={"method": param[2], "value": param[3]},
        fix=param[4],
        verbose=True
    )

### Plots

#### Data preparation and clurstering

In [None]:
pd_img_eggblue_roi = {k: v[v['hex'] != '#000000'] for k, v in sma.utils.pd_convert(eggblue_marilyn_roi).items()}
kmeans_eggblue_roi = sma.cluster.kmeans(pd_img_eggblue_roi, n_clusters=15)

#### Scatter plot with clusters

In [None]:
sma.plot.scatter(pd_img=pd_img_eggblue_roi, img_idx=25, kmeans=kmeans_eggblue_roi, verbose=True)

#### Bar chart

In [None]:
sma.plot.bar(pd_img=pd_img_eggblue_roi, img_idx=26, kmeans=kmeans_eggblue_roi, verbose=True)

#### Color ribbon

In [None]:
sma.plot.ribbon(pd_img=pd_img_eggblue_roi, img_idx=27, kmeans=kmeans_eggblue_roi, verbose=True)

# Gun shot repair for Blue Marilyn

Source code of used functions in this section:

- `sma.roi.knn_repair`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/roi/knn_repair.py

In [None]:
blue_marilyn_repair = sma.roi.knn_repair(
    np_img = np_img,
    key="blue_marilyn",
    img_idx="28_1",
    n_neighbors=8,
    start_col=410,
    end_col=450,
    start_row=375,
    end_row=430,
    verbose=True
)

# Save all images in a zip file

In [None]:
shutil.make_archive("img.zip".replace('.zip', ''), 'zip', 'img')