# Batch processing of the confocal images
### Inputs 
Confocal images as .plu files (binary standard of Sensofar company)
#### Input files organization 
For several images 'wafer #-&' per wafer 'wafer #' per cut 'cut ###'
<br>
the files organization is as follows:
<br>
root / cut ### / wafer # / save_plu / wafer #-&.plu

<br>
where:

- & stands for a, b, c, d, e 
- \# stands for a digit
- total number of files is open for each 'wafer #'
- total number of cuts ### defined by length of list cuts

### Outputs
5 files per image 'wafer #-&' 
- EXCEL file 'wafer#-&.xlsx' containing the morphological information of the features
- JPEG file 'wafer#-&-im-raw.jpg' of the corrected raw image with indication of the corrected pixels
- JPEG file 'wafer#-&-im-rebuild.jpg' of the features represented in colored circular dots of equivalent area as the real features
with color scale relative to the features depth
- JPEG file 'wafer#-&-im-bbox.jpg' of the features size histogram
- JPEG file 'wafer#-&-im-hist.jpg' of box plot of the features size

A $\LaTeX$ file 'cut_report ###.tex' as a report gathering the 4 JPEG files and a graph per cut integrating all the corresponding box plots 

#### output files location 
'root / cut ### / wafer # / results /' for 'wafer#-&.xlsx' and the JPEG files 
<br>
'root / cut ### /' for cut_report ###.tex
<br>
where:
- & stands for a, b, c, d, e 
- \# stands for a digit, 


In [None]:
# Standard library imports
import os.path
from pathlib import Path

# Path of 'site-packages' where useful packages are stored on MAC-OS
mac_packages = "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages"   

# Specification of the directories of the experimental files
root = Path("C:/Users/franc/OneDrive/Bureau/confocal/fichiers") 
if not os.path.isdir(root) : 
    root = Path('/Users/amal/Gwyddion Images /Carton-Louise')
cuts = ["cut 117","cut 119","cut 121","cut 134","cut 139"] 

In [None]:
# Standard library imports
import os
from pathlib import Path

# 3rd party imports
# Add package image_features_extract
try: # standard storage path of 'site-packages' on WIN
    import image_features_extract as ife
except: # Add storage path of 'site-packages' on MAC-OS
    import sys
    sys.path.append(mac_packages)
    import image_features_extract as ife

# Function to find the parent directory of the path x
parent_dir = lambda x: os.path.split(x)[0] if os.path.isdir(x) \
             else os.path.split(os.path.dirname(x))[0]

# parameters initialization
param = {}
param['Top_hat_length'] = 50  #  should be larger than the largest feature to extract
param['threshold'] = -0.3     # should be larger than the residual image background
param['root'] = root


for cut in cuts: # process the cuts
    nbr_wafer = len([x for x in os.listdir(root / Path(cut)) if x[0:5]=='wafer'])
    REP = [root / Path(cut) /Path("wafer "+str(i))/ Path('save_plu') for i in range(1,nbr_wafer + 1)]
    param['cut'] = cut
    nbr_rep = len(REP)
    mode = 'header'                         # mode "header" open a new rapport and flush an header
    
    for idx_rep,rep in enumerate(REP): # process the wafers
        param['repertoire'] = rep
        param['dir_results'] = parent_dir(param['repertoire'])/Path('results')
        param['dir_tex'] = root / Path(cut)  #parent_dir(param['repertoire'])
        param['wafer'] = "Wafer " + str(idx_rep+1)
        if not os.path.exists(param['dir_results']):
            os.mkdir(param['dir_results'])
        files = [y for y in os.listdir( param['repertoire']) if (y.split('.')[-1] == 'plu')]
        nbr_file = len(files)
        
        for idx_file,file in enumerate(files): # process the files
            param['file'] = file
            print(cut,file,idx_file+1,nbr_file)
            
            # we process the image
            iconfocal_img, im_corr, im_bin, df = ife.image_processing_1(param,analyse_morpho=True)
            
            # if we process the last image we add a tailer to the tech document
            if (idx_file+1 == nbr_file) & (idx_rep+1 == nbr_rep) : mode = 'tailer'
                
            # we add stuff to the tex report    
            ife.make_tex_document_1(param, df, mode)
            mode = 'corpus' # append section


# Feature morphological statistic

In [None]:
# Standard library imports
import os.path
from pathlib import Path

# Specification of the full output file name 
root = Path("C:/Users/franc/OneDrive/Bureau/confocal/fichiers")
if not os.path.isdir(root) : 
    root = Path('/Users/amal/Gwyddion Images /Carton-Louise') 
store_file = root / Path('synthesis.xlsx')

In [None]:
'''
Generates an excel folder with sheets named after the cuts_stat

Each sheet is organized as follow:

        area# nb_dot# mean_area# area_small# nb_dot_small# mean_area_small# area_large# nb_dot_large# mean area_large# 
wafer &
wafer &
wafer &
wafer &
wafer &

where:
    & = a, b, c, d,... is the label of the wafer
    # = 1,2,.. is the label of the confocal image
    
    area# : total area of the features 
    nb_dot# : total number of features
    mean_area# : mean area of the features
    
    area_small# : total area of small features (size < size_min)
    nb_dot_small# : total number of small features
    mean_area_small# : mean area of the small features
    
    area_large# : total area of large features (size >= size_min)
    nb_dot_large# : total number of large features
    mean_area_large# : mean area of the large features
    
'''
# Standard library imports 
import re

# 3rd party imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

%matplotlib inline

cuts_stat = ["cut 117","cut 119","cut 121","cut 134","cut 139"] # directories to be analysed
color = {"Wafer 1":'k',"Wafer 2":'r',"Wafer 3":'b',"Wafer 4":'c',"Wafer 5":'g'}
size_min =50 # size threshold used to distribute in two classes the features according to their size  

#####
#   # Statistical analysis and excel file generation 
#####

writer = pd.ExcelWriter(store_file, engine='openpyxl')

# dict used to plot the indent size vs indent depth 
indent_size = {}   # { cut label: list of all feature size  }
indent_height = {} # { cut label: list of all feature depth  }
indent_color = {}  # { cut label: list of all color define by the dict color  }

for cut in cuts_stat:
    nbr_wafer = len([x for x in os.listdir(root / Path(cut)) if x[0:5]=='wafer'])
    rep_ = [root / Path(cut) /Path("wafer "+str(i))/ Path('results') for i in range(1,nbr_wafer + 1)]
    indent_size[cut] = []
    indent_height[cut] = []
    indent_color[cut] = []
    flag = True # first round we create a data frame else we merge to the dataframe
    
    for idx_rep,rep in enumerate(rep_):
        dir_wafer ="Wafer " + str(idx_rep+1)
        
        # build the list of file name wafer#-&.xlsx or wafer#-&.xlsx where # is a digit, & is a letter
        files = []
        for file in os.listdir( rep):
            sub_str = re.findall("wafer\s?\d{1,3}-\w.xlsx", file)
            if len(sub_str) : files.append(sub_str[0] )

        size, size_a, size_b = [],[],[] # size of the features, size of the small features, size of the large features
        nbr, nbr_a, nbr_b = [],[],[] # number of  features, number of  small features, number of  large features
        mean, mean_a, mean_b = [],[],[] # mean size of  features, mean size of  small features, mean size of  large features
        res = {}  # dict containing the features statistics per cut
        
        for idx_file,file in enumerate(files):
            df = pd.read_excel(rep / Path(file))
            
            # update of the dicts 
            indent_size[cut].extend(np.array(df['size'].to_list()))
            indent_height[cut].extend(np.array(df['height'].to_list()))
            indent_color[cut].extend(color[dir_wafer]*len(df) )
            
            # stat of features 
            size.append(df['size'].sum())
            nbr.append(len(df))
            mean.append(df['size'].mean())
            
            # stat of large features with size > size_min
            df1 = df.query("size >= @size_min")
            size_a.append(df1['size'].sum())
            nbr_a.append(len(df1))
            mean_a.append(df1['size'].mean())
            
            # stat of small features with size < size_min
            df1 = df.query("size < @size_min")
            size_b.append(df1['size'].sum())
            nbr_b.append(len(df1))
            mean_b.append(df1['size'].mean())
        
        # update the dic res 
        res['area'+str(idx_rep+1)] = size 
        res['nb_dot'+str(idx_rep+1)] = nbr
        res['mean area'+str(idx_rep+1)] = mean
        res['area_small'+str(idx_rep+1)] = size_b 
        res['nb_dot_small'+str(idx_rep+1)] = nbr_b
        res['mean area_small'+str(idx_rep+1)] = mean_b
        res['area_large'+str(idx_rep+1)] = size_a 
        res['nb_dot_large'+str(idx_rep+1)] = nbr_a
        res['mean area_large'+str(idx_rep+1)] = mean_a
        
        for key, value in res.items():  # add mean value and std 
            res[key] = value+[np.mean(value), np.std(value)]
            
        # flush dict into a data frame 
        if flag: # first round we create a new data frame
            flag = False
            dg = pd.DataFrame.from_dict(res)
            
        else: # we merge to the existing data frame
            dg = dg.merge(pd.DataFrame.from_dict(res), left_index=True, right_index=True)
        
    dg.index = ["wafer# a","wafer# b","wafer# c","wafer# d","wafer# e","mean","std"] 
    
    # flush the data frame into an excel file
    dg.to_excel(writer, sheet_name=cut)
    writer.save()
    writer.close()

#####
#   # plot of the results 
#####
fig = plt.figure(figsize=(15,15))
for idx, cut in enumerate(cuts_stat):
    plt.subplot(3,2,idx+1)
    x = -np.array(indent_height[cut])
    y = np.array(indent_size[cut])
    c = indent_color[cut]

    plt.scatter(x,y, s=50, c=c)
    plt.xlim(0.2,3.5)
    plt.ylim(0,2000)
    plt.xlabel('depth (µm)')
    plt.ylabel('size (px\u00B2)')
    plt.title(cut)
fig.tight_layout()

## K-mean clustering

In [None]:
# 3rd party imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

%matplotlib inline

cuts = ['cut 117', 'cut 119', 'cut 121', 'cut 134', 'cut 139']
centroid = {}
fig = plt.figure(figsize=(15,15))
for idx, cut in enumerate(cuts):
    plt.subplot(3,2,idx+1)
    Data = {'x': -np.array(indent_height[cut]),
            'y': np.array(indent_size[cut])
           }
    df = pd.DataFrame(Data,columns=['x','y'])
    kmeans = KMeans(n_clusters=5).fit(df)
    centroids = kmeans.cluster_centers_
    centroid[cut] = centroids
    

    plt.scatter(df['x'], df['y'], c= kmeans.labels_.astype(float), s=20, alpha=0.5)
    plt.scatter(centroids.flatten()[::2],centroids.flatten()[1::2],s=50,c='r')
    plt.xlim(0.2,3.5)
    plt.ylim(0,2000)
    plt.xlabel('depth (µm)')
    plt.ylabel('size (px\u00B2)')
    plt.title(cut)  


In [None]:
'''
non linear fit by a power law a+b*x^c
'''

# 3rd party imports
import numpy as np
import matplotlib.pyplot as plt
import scipy

%matplotlib inline

def f(x,a,b,c): # Power law fit
    return a + b*x**(c)

color = {"cut 117":'k',"cut 119":'r',"cut 121":'b',"cut 134":'c',"cut 139":'y'}
xt = []
yt = []
for x,y in centroid.items():
    plt.scatter(y.flatten()[::2],y.flatten()[1::2],s=50,label=x, c=color[x])
    xt.extend(y.flatten()[::2])
    yt.extend(y.flatten()[1::2])

plt.xlabel('depth (µm)')
plt.ylabel('size (px\u00B2)')   
plt.legend()
par,cov = scipy.optimize.curve_fit(f,xt,yt)
x_fit = np.arange(0.4,2,0.01)
_ =plt.plot(x_fit,f(x_fit,*par))