In [7]:
import numpy as np
import pandas as pd
import os
import cv2
from pathlib import Path
import matplotlib as mpl
import matplotlib.pyplot as plt
import re
#
import scienceplots
from cmap import Colormap
from PIL import Image

In [2]:
plt.style.use(['science', 'grid'])

In [3]:
df = pd.read_csv("data_pattern.csv")
groups = sorted(df.group.unique())
space_groups = pd.read_csv("space_groups.csv", index_col="ICSD_code").to_dict()["space_group"]

In [4]:
dataset_dir = "../datasets/FDP"
results_dir = "../results"

differences = {group: np.zeros((128, 128)) for group in groups}
differences_counts = {group: 0 for group in groups}

for index, row in df.iterrows():
    code = row["code"]
    group = row["group"]
    seed = row["seed"]
    phase = row["phase"]
    epoch = row["epoch"]
    if (phase == "test" or phase == "val") and epoch == 100:
        fake = cv2.cvtColor(cv2.imread(
            os.path.join(results_dir, seed, phase + "_100", "images", str(code) + "_structure_synthesized_image.png")
        ), cv2.COLOR_BGR2GRAY) / 255
        real = cv2.cvtColor(cv2.imread(
            os.path.join(dataset_dir, str(code), str(code) + "_+0+0+0.png")
        ), cv2.COLOR_BGR2GRAY) / 255
        
        diff = (real - fake)**2
        
        differences[group] += diff
        differences_counts[group] += 1

for group in groups:
    differences[group] /= differences_counts[group]

In [5]:
normalised = {}
for group in groups:
    normalised[group] =  differences[group] / np.max(differences[group])

In [19]:
results_dirs = [os.path.join("../results", d) for d in os.listdir("../results") if re.match('^pattern.*', d)]
dataset_dir = "../datasets/FDP"

cmap = Colormap("colorbrewer:rdylgn_6_r").to_mpl()

for group in groups:
    image = differences[group] * 16.66667
    image_colour = cmap(image)
    output = os.path.join("figures", "group differences")
    Path(output).mkdir(parents=True, exist_ok=True)

    Image.fromarray((image_colour[:, :, 0:3] * 255).astype(np.uint8), mode="RGB").save(os.path.join(output, group + "_" + f"{np.max(differences[group]):.3f}" + ".png"))

### We now repeat for structures

In [26]:
# Ensure we are testing diffraction structure data
df = pd.read_csv("data.csv")
df = df.loc[df.seed.str.match('^structure.*')]

groups = sorted(df.group.unique())
space_groups = pd.read_csv("space_groups.csv", index_col="ICSD_code").to_dict()["space_group"]

dataset_dir = "../datasets/FDP"
results_dir = "../results"

differences = {group: np.zeros((128, 128)) for group in groups}
differences_counts = {group: 0 for group in groups}

for index, row in df.iterrows():
    code = row["code"]
    group = row["group"]
    seed = row["seed"]
    phase = row["phase"]
    epoch = row["epoch"]
    
    if (phase == "test" or phase == "val") and epoch == 100:
        fake = cv2.cvtColor(cv2.imread(
            os.path.join(results_dir, seed, phase + "_100", "images", str(code) + "_+0+0+0_synthesized_image.png")
        ), cv2.COLOR_BGR2GRAY) / 255
        real = cv2.cvtColor(cv2.imread(
            os.path.join(dataset_dir, str(code), str(code) + "_structure.png")
        ), cv2.COLOR_BGR2GRAY) / 255
        
        diff = (real - fake)**2
        
        differences[group] += diff
        differences_counts[group] += 1

for group in groups:
    differences[group] /= differences_counts[group]

normalised = {}
for group in groups:
    normalised[group] =  differences[group] / np.max(differences[group])

results_dirs = [os.path.join("../results", d) for d in os.listdir("../results") if re.match('^pattern.*', d)]
dataset_dir = "../datasets/FDP"

for group in groups:
    image = normalised[group]
    image_colour = cmap(image)
    image_colour
    output = os.path.join("figures", "group differences")
    Path(output).mkdir(parents=True, exist_ok=True)

    cv2.imwrite(os.path.join(output, "structure_" + group + "_" + f"{np.max(differences[group]):.3f}" + ".png"), image_colour * 255)

  differences[group] /= differences_counts[group]
