# 0. Introduction

By Xuyang (Rhett) Zhou at Max-Planck-Institut für Eisenforschung GmbH, 2021/05/14 13:16

This code aims to enable fast data processing when one has several nanoindentation results and wants to compare these results together.

First, the user can place all the experiment files (.xls files) in a folder where the individual files are named one after the other. 

In step 2, the user can select any of the .xls files to start with. Then the user can specify the structure of the naming (up to 3 levels are supported). All the raw data points in each region of interest (ROI) are plotted and save in the subfolder 'png'.

In step 3, the user can select the nonsensical data points, label them and exclude them for later statistical analysis. This step is important as there may be contaminants on the surface of your sample that will affect the reliability of your measurements. The remaining data points in each region of interest (ROI) are also plotted and save in the subfolder 'png'. After completing the entire step 3, the metadata has been saved and is ready for the next use. 

Step 4 offers the possibility to plot all curves together and draw the statistical results.

# 1. Import useful functions

In [2]:
%matplotlib
import matplotlib.pyplot as plt
import xlrd
import numpy as np
from tkinter import filedialog
import os
import copy
import shutil
import pickle 
plt.rcParams.update({'figure.max_open_warning': 0})

def annot_max(x,y, p_numb, ax=None):
    x = np.array(x)
    y = np.array(y)
    xmax = x[np.argmax(y)]
    ymax = y.max()
    text= "{:d}".format(p_numb)
    if not ax:
        ax=plt.gca()
    kw = dict(xycoords='data',textcoords="data",ha="left", va="top")
    ax.annotate(text, xy=(xmax, ymax), xytext=(xmax,ymax), **kw)

Using matplotlib backend: Qt5Agg


# 2. Import experimental data

## 2.1 Load the data

#### From the out of fn, user can know the structure of your filename, the string starts from index 0

In [3]:
file_path = filedialog.askopenfilename()
file_name = os.path.split(file_path)[1]  # Get the name of the analysed file
fold_path = os.path.split(file_path)[0]
fn = file_name[:-4].split('_')
fn

['example', '0', 'II', 'c']

## 2.2 Define the structure of naming

#### Note: This function supports a maximum of three loops for different experimental conditions. If less than three, set it to be [''].

In [4]:
######### User can change the parameters below ########### 
p1 = ['0']
p2 = ['I','II']
p3 = ['a','b','c']
data_point = 25
#########################################################

## 2.3 Collect all data into a list, save plots as 'png' file, and make a copy of the raw data

In [6]:
data_raw = []
results_raw = []
key = 1
for i in p1:
    data_raw_p2 = []
    results_raw_p2 = []
    for j in p2:
        data_raw_p3 = []
        results_raw_p3 = []
        for k in p3:
            ############## User can set any impossible condition below ####################################
            if not (j == 'I' and k == 'c'):
            #############################################################################################
                data_raw_per_area = []
                results_raw_per_area = []
                ################# User can set names for your xls file below (use above fn) #############
                name = fn[0] + '_' + i + '_' + j + '_' + k + '.xls'
                ##########################################################################################
                workbook = xlrd.open_workbook(fold_path + '/' + name)
                
                directory = fold_path + '/' + 'png'
                name_save = i + '_' + j + '_' + k + '.png'
                
                sheet_name_results = 'Results'
                sheet_results = workbook.sheet_by_name(sheet_name_results)
                
                fig=plt.figure()
                plt.ioff()
                for l in range(1,data_point+1): 
                    modulus_reduced = sheet_results.cell(l+1,1)
                    modulus = sheet_results.cell(l+1,2)
                    hardness = sheet_results.cell(l+1,3)
                    stiffness = sheet_results.cell(l+1,9)
                    results_point = [modulus_reduced,modulus,hardness,stiffness,1]
                    results_raw_per_area.append(results_point)
                    
                    sheet_name = 'Test ' + format(l, '0>3d')
                    sheet = workbook.sheet_by_name(sheet_name)
                    row_count = sheet.nrows
                    col_count = sheet.ncols
                    col_x = sheet.col_values(1, start_rowx=2, end_rowx=row_count-1)
                    col_y = sheet.col_values(2, start_rowx=2, end_rowx=row_count-1)
                    data_point_curve = [col_x, col_y]
                    data_raw_per_area.append(data_point_curve)
                    
                    if key == 1:
                        header_results = sheet_results.row_values(0, start_colx=0, end_colx=None)
                        unit_results = sheet_results.row_values(1, start_colx=0, end_colx=None)
                        modulus_reduced_header = header_results[1] + " (" + unit_results[1] + ")"
                        modulus_header = header_results[2] + " (" + unit_results[2] + ")"
                        hardness_header = header_results[3] + " (" + unit_results[3] + ")"
                        stiffness_header = header_results[9] + " (" + unit_results[9] + ")"
                        overall_header = [modulus_reduced_header,modulus_header,hardness_header,stiffness_header]
                        overall_header_no_unit = [header_results[1],header_results[2],header_results[3],header_results[9]]
                        
                        header = sheet.row_values(0, start_colx=0, end_colx=None)
                        unit = sheet.row_values(1, start_colx=0, end_colx=None)
                        col_x_header = header[1] + " (" + unit[1] + ")"
                        col_y_header = header[2] + " (" + unit[2] + ")"
                        try:
                            os.stat(directory)
                        except:
                            os.mkdir(directory) 
                        key = 0
                        
                    plt.plot(col_x,col_y, zorder = l)
                    annot_max(col_x,col_y,l)
                                        
                plt.xlabel(col_x_header)
                plt.ylabel(col_y_header)
                ####################### User can set ranges of x, y axes below #########################
                plt.xlim(-5,55)
                plt.ylim(-0.05, 0.85)
                #######################################################################################
                plt.savefig(directory + '/' + name_save)
                plt.close(fig)
                
                data_raw_p3.append(data_raw_per_area)
                results_raw_p3.append(results_raw_per_area)
        data_raw_p2.append(data_raw_p3)
        results_raw_p2.append(results_raw_p3)
    data_raw.append(data_raw_p2)
    results_raw.append(results_raw_p2)
results = copy.deepcopy(results_raw)

# 3. Plot curves in one ROI for deleting bad point(s)

## 3.1 Add parameters for slecting one ROI to plot curves

In [7]:
######### User can change the parameters below ###########
p1_value = '0'
p2_value = 'I'
p3_value = 'a'
dp_value = [0,25,1]
#########################################################

## 3.2 Plot all raw curves in one ROI

In [8]:
p1_i = p1.index(p1_value)
p2_i = p2.index(p2_value)
p3_i = p3.index(p3_value)
plt.ion()
fig=plt.figure()
for i in range(dp_value[0],dp_value[1],dp_value[2]):
    col_x = data_raw[p1_i][p2_i][p3_i][i][0]
    col_y = data_raw[p1_i][p2_i][p3_i][i][1]
    plt.plot(col_x,col_y, zorder = i)
    annot_max(col_x,col_y,i)
plt.xlabel(col_x_header)
plt.ylabel(col_y_header)

Text(0, 0.5, 'Load On Sample (mN)')

## 3.3 Store the number of bad point(s)

In [7]:
already_deleted_points = []
for i in range(dp_value[0],dp_value[1],dp_value[2]):
    if results[p1_i][p2_i][p3_i][i][4] == 0:
        already_deleted_points.append(i)
print(p1_value, p2_value, p3_value)        
print(already_deleted_points)

0 I a
[]


In [9]:
## User can choose which point(s) to delect or add from below ###
delete_points = [11, 12, 19, 23]
add_points = []
#########################################################
for i in delete_points:
    results[p1_i][p2_i][p3_i][i][4] = 0
for i in add_points:
    results[p1_i][p2_i][p3_i][i][4] = 1

## 3.4 Check the new plots with bad point(s) deleted

In [11]:
plt.ion()
fig=plt.figure()
results_array = np.zeros([data_point,4])
for i in range(dp_value[0],dp_value[1],dp_value[2]):
    if results[p1_i][p2_i][p3_i][i][4] != 0:
        for j in range(4):
            results_array[i][j] = results[p1_i][p2_i][p3_i][i][j].value
        col_x = data_raw[p1_i][p2_i][p3_i][i][0]
        col_y = data_raw[p1_i][p2_i][p3_i][i][1]
        plt.plot(col_x,col_y, zorder = i)
        #annot_max(col_x,col_y,i)
results_mean = np.mean(np.ma.masked_equal(results_array,0),0)
results_std = np.std(np.ma.masked_equal(results_array,0),0)
results_label=header_results[1][:-12]+':'+format(results_mean[0],'.1f')+r'$ \pm $'+format(results_std[0], '.1f')+' '+unit_results[1]+"\n"+\
              header_results[2][:-12]+':'+format(results_mean[1],'.1f')+r'$ \pm $'+format(results_std[1], '.1f')+' '+unit_results[2]+"\n"+\
              header_results[3][:-12]+':'+format(results_mean[2],'.1f')+r'$ \pm $'+format(results_std[2], '.1f')+' '+unit_results[3]+"\n"+\
              header_results[9][:-12]+':'+format(results_mean[3],'.1f')+r'$ \pm $'+format(results_std[3], '.1f')+' '+unit_results[9]     
plt.xlabel(col_x_header)
plt.ylabel(col_y_header)
#### User can set ranges of x, y axes, and label of statistical results below ##########
plt.xlim(-5,55)
plt.ylim(-0.05, 0.85)
plt.text(0.1, 0.5, results_label, fontsize=12)
#######################################################################################
name_save = p1_value + '_' + p2_value + '_' + p3_value + '__cleaned.png'
plt.savefig(directory + '/' + name_save)

## Repeat 3.1-3.4

## 3.5 Save the metadata with the bad point(s) deleted

In [12]:
metadata_save = 'metadata_with_bad_points_labeled'
with open(directory + '/' + metadata_save, 'wb') as f: 
     pickle.dump(results, f)

# 4. Plot curves in multiple ROIs together

## 4.1 Load the metadata if restart the calculation

In [13]:
metadata_save = 'metadata_with_bad_points_labeled'
with open(directory + '/' + metadata_save, 'rb') as f: 
      results = pickle.load(f) 

## 4.2 Add parameters for slecting mutiple ROIs to plot curves

In [14]:
######### User can change the parameters below ########### 
p1_plots = ['0']
p2_plots = ['I','II']
p3_plots = ['a','b','c']
dp_plots = [0,25,1]
#########################################################

## 4.3 Plot all slected curves in multiple ROIs

### 4.3.1 Plot Displacement - Load curves

In [15]:
plt.ion()
fig=plt.figure()
curve_num = 0
key = 1
results_array = np.zeros([len(p2_plots),data_point*len(p3_plots)*len(p1_plots),4])
results_mean_array = np.zeros([len(p2_plots),4])
results_std_array = np.zeros([len(p2_plots),4])
color_libary = ['crimson', 'darkorange', 'limegreen', 'dodgerblue', 'darkturquoise','royalblue','darkviolet']
for j in range(len(p2_plots)):
    for i in range(len(p1_plots)):
        for k in range(len(p3_plots)):
            ############################ Again, User can set any impossible condition below #############################################
            if not (p2_plots[j] == 'I' and p3_plots[k] == 'c'):
            ############################################################################################################################
                for l in range(dp_plots[0],dp_plots[1],dp_plots[2]):
                    if results[i][j][k][l][4] != 0:
                        n = l + data_point*k + data_point*len(p3_plots)*i
                        for m in range(4):
                            results_array[j][n][m] = results[i][j][k][l][m].value
                        col_x = data_raw[i][j][k][l][0]
                        col_y = data_raw[i][j][k][l][1]
                        c = color_libary[j]
                        if key ==1:
                            ############################ Again, User can set any lable below ######################################
                            plt.plot(col_x,col_y, zorder = curve_num, color = c, alpha = 0.3,label='Parameter : ' + p2_plots[j])
                            #######################################################################################################
                            key = 0
                        else:
                            plt.plot(col_x,col_y, zorder = curve_num, color = c, alpha = 0.3)
                        curve_num = curve_num + 1
    results_mean_array[j][:] = np.mean(np.ma.masked_equal(results_array[j],0),0)
    results_std_array[j][:] = np.std(np.ma.masked_equal(results_array[j],0),0)
    key = 1
plt.xlabel(col_x_header)
plt.ylabel(col_y_header)
plt.legend(loc='upper left')
############################ Again, User can set the names below ###################################
name_save = 'All curves.png'
######################################################################################################
plt.savefig(directory + '/' + name_save)

### 4.3.2 Copy the data for comparing with other data sets

#### Only one of the following unit cells needs to be executed at a time

In [16]:
statistic_mean = copy.deepcopy(results_mean_array)
statistic_std = copy.deepcopy(results_std_array)

### 4.3.3 Plot statistic results from different groups of data sets

In [17]:
############# 1. Reduced moduluse; 2. Modulus; 3. Hardness; 4. Stiffness##################################
plot_type = 2
##########################################################################################################
x_axis = []
for i in range(len(p2_plots)):
    x_axis.append(i)
plt.ion()
fig=plt.figure()
plt.errorbar(x_axis,statistic_mean.T[plot_type],yerr=statistic_std.T[plot_type], uplims=True, lolims=True, \
             color='limegreen',lw=3, marker='d', mfc='green', mec='limegreen', ms=12, mew=3)
plt.xlabel('Parameter')
plt.ylabel(overall_header[plot_type])

name_save = 'Compare_' + overall_header_no_unit[plot_type] +'.png'
plt.savefig(directory + '/' + name_save)

# END