<a href="https://colab.research.google.com/github/GitData-GA/shot-marilyns-analysis/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import libraries and scripts

In [None]:
import os
import shutil
import sys
from google.colab import userdata

!git clone https://github.com/GitData-GA/shot-marilyns-analysis.git
sys.path.insert(0, './shot-marilyns-analysis/src')

import sma
os.makedirs(r'img')

# Improt images and prepare data

Source code of used functions in this section:

- `sma.utils.save_img`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/utils/save_img.py

- `sma.utils.np_convert`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/utils/np_convert.py

- `sma.utils.pd_convert`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/utils/pd_convert.py


In [None]:
img_links = {
    "orange_marilyn": "shot-marilyns-analysis/data/1_1_orange_marilyn.jpg",
    "red_marilyn": "shot-marilyns-analysis/data/1_2_red_marilyn.jpg",
    "turq_marilyn": "shot-marilyns-analysis/data/1_3_turq_marilyn.jpg",
    "blue_marilyn": "shot-marilyns-analysis/data/1_4_blue_marilyn.jpg",
    "eggblue_marilyn": "shot-marilyns-analysis/data/1_5_eggblue_marilyn.jpg"
}

## Save images to local directory

In [None]:
sma.utils.save_img(img_links=img_links, img_idx=1, verbose=True)

## Store the images as a NumPy array

In [None]:
np_img = sma.utils.np_convert(img_links=img_links)
np_img['blue_marilyn']

## Store the images as a dictionary of 5 Pandas dataframes with HEX codes

In [None]:
pd_img = sma.utils.pd_convert(np_img=np_img)
pd_img['orange_marilyn']

# Visualization

Source code of used functions in this section:

- `sma.plot.distribution`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/plot/distribution.py

- `sma.plot.entropy_heatmap`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/plot/entropy_heatmap.py

- `sma.plot.scatter`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/plot/scatter.py

- `sma.cluster.kmeans`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/cluster/kmeans.py

- `sma.plot.bar`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/plot/bar.py

- `sma.plot.ribbon`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/plot/ribbon.py

## Distribution plots

In [None]:
sma.plot.distribution(np_img=np_img, img_idx=2, verbose=True)

## Relative conditional entropy plots

In [None]:
sma.plot.entropy_heatmap(np_img=np_img, img_idx=3, verbose=True)

## RGB space scatterplots

In [None]:
sma.plot.scatter(pd_img=pd_img, img_idx=4, verbose=True)

## KMeans clustering

In [None]:
kmean_result = sma.cluster.kmeans(pd_img=pd_img, n_clusters=15)

### Scatter plot by clusters

In [None]:
sma.plot.scatter(pd_img=pd_img, img_idx=5, kmeans=kmean_result, verbose=True)

### Bar chart by clusters

In [None]:
sma.plot.bar(pd_img=pd_img, img_idx=6, kmeans=kmean_result, verbose=True)

### Color ribbon by clusters

In [None]:
sma.plot.ribbon(pd_img=pd_img, img_idx=7, kmeans=kmean_result, verbose=True)

# Region of interest

Source code of used functions in this section:

- `sma.roi.extract`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/roi/extract.py

## Extraction

In [None]:
roi = {}
param = {
    "orange_marilyn": [
        ("8_1", "background", "sampling", [(0, 100, 0, 100)], None),
        ("8_2", "hair", "sampling", [(100, 150, 300, 400)], None),
        ("8_3", "eyeshadow", "sampling", [(460, 490, 450, 550)], None),
        ("8_4", "face", "sampling", [(300, 418, 300, 610)], None)
    ],
    "red_marilyn": [
        ("9_1", "background", "sampling", [(0, 960, 0, 30), (0, 960, 930, 960)], (690, 800, 275, 475)),
        ("9_2", "hair", "sampling", [(10, 200, 300, 380), (10, 200, 440, 520)], None),
        ("9_3", "eyeshadow", "sampling", [(455, 480, 455, 520)], None),
        ("9_4", "face", "sampling", [(280, 460, 310, 435)], None)
    ],
    "turq_marilyn": [
        ("10_1", "background", "sampling", [(0, 960, 0, 40), (0, 960, 930, 970)], None),
        ("10_2", "hair", "exact", [(19, 101, 70), (28, 255, 255)], None),
        ("10_3", "eyeshadow", "sampling", [(470, 490, 300, 320), (470, 490, 500, 550)], None),
        ("10_4", "face", "exact", [(0, 0, 0), (17, 100, 255)], None)
    ],
    "blue_marilyn": [
        ("11_1", "background", "sampling", [(0, 145, 0, 145)], None),
        ("11_2", "hair", "sampling", [(45, 200, 485, 590)], None),
        ("11_3", "eyeshadow", "sampling", [(470, 490, 300, 320)], (600, 960, 600, 800)),
        ("11_4", "face", "exact", [(110, 40, 0), (255, 255, 255)], None),
    ],
    "eggblue_marilyn": [
        ("12_1", "background", "exact", [(76, 31, 178), (88, 61, 210)], None),
        ("12_2", "hair", "exact", [(12, 60, 60), (27, 181, 232)], None),
        ("12_3", "eyeshadow", "sampling", [(460, 480, 470, 500)], (600, 960, 600, 800)),
        ("12_4", "face", "exact", [(110, 40, 0), (255, 255, 255)], None),
    ]
}

for key, value in param.items():
    single_roi = {}
    for param in value:
        single_roi[f"{key}_{param[1]}"] = sma.roi.extract(
            np_img=np_img,
            key=key,
            img_idx=param[0],
            extraction_name=param[1],
            param={"method": param[2], "value": param[3]},
            fix=param[4],
            verbose=True
        )
    roi[key] = single_roi

## Plots

In [None]:
img_idx = 12
for key, value in roi.items():
    pd_img_roi = {k: v[v['hex'] != '#000000'] for k, v in sma.utils.pd_convert(value).items()}
    kmeans_roi = sma.cluster.kmeans(pd_img_roi, n_clusters=15)
    sma.plot.scatter(pd_img=pd_img_roi, img_idx=img_idx + 1, kmeans=kmeans_roi, verbose=True)
    sma.plot.bar(pd_img=pd_img_roi, img_idx=img_idx + 2, kmeans=kmeans_roi, verbose=True)
    sma.plot.ribbon(pd_img=pd_img_roi, img_idx=img_idx + 3, kmeans=kmeans_roi, verbose=True)
    img_idx += 3

# Gun shot repair for Blue Marilyn

Source code of used functions in this section:

- `sma.roi.knn_repair`: https://github.com/GitData-GA/shot-marilyns-analysis/blob/main/src/sma/roi/knn_repair.py

In [None]:
blue_marilyn_repair = sma.roi.knn_repair(
    np_img = np_img,
    key="blue_marilyn",
    img_idx="28_1",
    n_neighbors=8,
    start_col=410,
    end_col=450,
    start_row=375,
    end_row=430,
    verbose=True
)

# Save all images in a zip file

In [None]:
shutil.make_archive("img.zip".replace('.zip', ''), 'zip', 'img')

# Push images to GitHub (for project members only)

- Please add the following secretes on your Colab:

  - `gh_email`: your email address of the GitHub account that can access the [repo](https://github.com/GitData-GA/shot-marilyns-analysis/tree/main)

  - `gh_username`: your username of the GitHub account that can access the [repo](https://github.com/GitData-GA/shot-marilyns-analysis/tree/main)

  - `gh_password`: your password address of the GitHub account that can access the [repo](https://github.com/GitData-GA/shot-marilyns-analysis/tree/main)

  - `gh_token`: your personal token of the GitHub account that can access the [repo](https://github.com/GitData-GA/shot-marilyns-analysis/tree/main). To get a personal access token, please see the [documentation](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-personal-access-token-classic). In the "Select scopes" section, simply select all scopes.

In [None]:
!git config --global user.email {userdata.get('gh_email')}
!git config --global user.name {userdata.get('gh_username')}
!git config --global user.password {userdata.get('gh_password')}
!git clone --branch gh-pages https://{userdata.get('gh_token')}@github.com/GitData-GA/shot-marilyns-analysis gh-pages-copy
%rm -r gh-pages-copy/assets/img-paper/*
%cp img/* gh-pages-copy/assets/img-paper
%cd gh-pages-copy
!git add .
!git commit -m 'Commit from colab - project shot-marilyns-analysis'
!git push origin gh-pages