# Preprocessing

### import modules

In [None]:
# Data, Plot and Statistics
import os
import shutil
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator, MaxNLocator
import seaborn as sns
import statsmodels.api as sm
import six
from sklearn.model_selection import train_test_split

### read data

In [None]:
read_PATH = './database/new_opv_ai_database_2010-2023_編碼決定版.xlsx'
file = pd.read_excel(read_PATH)
file.head() # First 5 raws of raws of data

In [None]:
file.describe() # fundamental statistics

### define functions

In [None]:
def draw_save_table(data, col_width=3.0, row_height=0.625, font_size=14,
                     header_color='#40466e', row_colors=['#f1f1f2', 'w'], edge_color='k',
                     bbox=[0, 0, 1, 1], header_columns=0,
                     ax=None, **kwargs):
    if ax is None:
        size = (np.array(data.shape[::-1]) + np.array([0, 1])) * np.array([col_width, row_height])
        fig, ax = plt.subplots(figsize=size)
        ax.axis('off')
    mpl_table = ax.table(cellText=data.values, bbox=bbox, colLabels=data.columns, **kwargs)
    mpl_table.auto_set_font_size(False)
    mpl_table.set_fontsize(font_size)
    for k, cell in six.iteritems(mpl_table._cells):
        cell.set_edgecolor(edge_color)
        if k[0] == 0 or k[1] < header_columns:
            cell.set_text_props(weight='bold', color='w')
            cell.set_facecolor(header_color)
        else:
            cell.set_facecolor(row_colors[k[0]%len(row_colors)])
    return ax

In [None]:
### New folder for saving fundamental statistics plot and table
direction = f'./statistics'
if not os.path.exists(direction):
    os.mkdir(direction)
else:
    shutil.rmtree(direction)
    os.mkdir(direction)

### drop NA and get used data

In [None]:
### dropout the unused column
raw_row = file.shape[0]
raw_col = file.shape[1]
print(f'Raw data, rows:{raw_row}, columns:{raw_col}')
#file = file.drop("silence", axis=1)
file = file.drop("ratio A/D", axis=1)
print(f'Data without unused columns, rows:{file.shape[0]}, columns:{file.shape[1]}')
### dropout the raw with blank data (N/A)
file = pd.DataFrame(file)
file = file.dropna()
final_row = file.shape[0]
final_col = file.shape[1]
print(f'Final data (real input), rows:{final_row}, columns:{final_col}')
### table layout
dictionary = {'Data':['Raw','Final'],
              'Rows':[raw_row, final_row],
              'Columns':[raw_col,final_col]}
df_stat = pd.DataFrame(dictionary)
ax = draw_save_table(df_stat, header_columns=0, col_width=2.0)
ax.figure.savefig(f'./statistics/Data_shape_record.jpg')

In [None]:
file.info() # check data in each column is in the right form

In [None]:
i = file.shape[0]
index = np.array(file['Index']).reshape(i,)
### Donor material
donor_name = np.array(file['Donor']).reshape(i,)
donor_smiles = np.array(file['Donor SMILES']).reshape(i,)
donor_homo = np.array(file['HOMO of Donor (eV)']).reshape(i,)
donor_lumo = np.array(file['LUMO of Donor (eV)']).reshape(i,)
donor_bandgap = np.array(file['Bandgap of Donor (eV)']).reshape(i,)
### Acceptor material
acceptor_name = np.array(file['Acceptor']).reshape(i,)
acceptor_smiles = np.array(file['Acceptor SMILES']).reshape(i,)
acceptor_homo = np.array(file['HOMO of Acceptor (eV)']).reshape(i,)
acceptor_lumo = np.array(file['LUMO of Acceptor (eV)']).reshape(i,)
acceptor_bandgap = np.array(file['Bandgap of Acceptor (eV)']).reshape(i,)
### Device performance
pce = np.array(file['PCE (%)']).reshape(i,)
voc = np.array(file['Voc (V)']).reshape(i,)
jsc = np.array(file['Jsc (mAcm-2)']).reshape(i,)
ff = np.array(file['FF']).reshape(i,)

In [None]:
print(f'Device: {len(np.unique(donor_smiles+acceptor_smiles))}, Donor materials: {len(np.unique(donor_smiles))}, Acceptor materials: {len(np.unique(acceptor_smiles))}')

In [None]:
print(file.shape)

In [None]:
file.to_excel('./database/opv_ai_database_used.xlsx')

In [None]:
'''
name
PCE (%)
J$_{SC}$ (mAcm$^{-2}$)
V$_{OC}$ (V)
FF
'''

### correlation

In [None]:
### correlation of each two parameters (properties and performacne) with heat color
df = pd.DataFrame({'HOMO of Donor (eV)':donor_homo, 'LUMO of Donor (eV)':donor_lumo,
                   'Bandgape of Donor (eV)':donor_bandgap,
                   'HOMO of Acceptor (eV)':acceptor_homo, 'LUMO of Acceptor (eV)':acceptor_lumo,
                   'Bandgape of Acceptor (eV)':acceptor_bandgap,
                   'PCE (%)':pce, 'V$_{OC}$ (V)':voc, 'J$_{SC}$ (mAcm$^{-2}$)':jsc, 'FF':ff})
plt.figure(figsize=(6,5), dpi=300)
plt.rc('font', family='Arial', size=8, weight='normal')
cor = df.corr()
plot = sns.heatmap(cor, annot=True, fmt='.2f', cmap=plt.cm.Reds)
fig = plot.get_figure()
fig.savefig('./statistics/heatmap.png', bbox_inches='tight')
plt.show()

In [None]:
### pair plot (scatter plot of each two parameters)
plt.figure(figsize=(6, 6), dpi=300)
plt.rc('font', family='Arial', size=8, weight='normal')
fig = sns.pairplot(df)
fig.savefig('./statistics/pairplot.png', bbox_inches='tight')
plt.show()

In [None]:
### scatter plot
plt.figure(figsize=(2.2, 2.2), dpi=300)
plt.rc('font', family='Arial', size=8, weight='normal')
plt.scatter(jsc, pce, 1.5)
### linear regression
r = np.corrcoef(jsc, pce)[0,1].round(2)
linear_model = np.polyfit(jsc, pce, 1)
print(f'y = {round(linear_model[0], 4)} x + {round(linear_model[1], 4)}')
linear_model_fn = np.poly1d(linear_model)
x_s = np.arange(0,40,0.1)
plt.plot(x_s, linear_model_fn(x_s), "r--", linewidth=1.25)
### R legend

#box = {'facecolor': 'none', 'edgecolor': 'green', 'boxstyle': 'round'}

plt.text(3, 18, f'R = {r}')

### coordinate axis
plt.xlim(0,35)
plt.ylim(0,20)
plt.xlabel('J$_{SC}$ (mA/cm$^{2}$)')
plt.ylabel('PCE (%)')
### locator
ax = plt.gca()
ax.set_aspect('auto', adjustable='box', anchor='C')
ax.xaxis.set_major_locator(MaxNLocator(7))
ax.yaxis.set_major_locator(MaxNLocator(4))
ax.xaxis.set_minor_locator(AutoMinorLocator(5))
ax.yaxis.set_minor_locator(AutoMinorLocator(5))
for axis in ['top', 'bottom', 'left', 'right']:
    ax.spines[axis].set_linewidth(1.5)
ax.tick_params(axis='both', which='major', length=7, width=1.25, direction='in')
ax.tick_params(axis='both', which='minor', length=4, width=1, direction='in')
print(r)
plt.savefig('./statistics/jsc_pce_22_8.png', bbox_inches='tight')

In [None]:
### scatter plot
plt.figure(figsize=(3.35, 3.35), dpi=300)
plt.rc('font', family='Arial', size=12, weight='normal')
plt.scatter(jsc, pce, 1.5)
### linear regression
r = np.corrcoef(jsc, pce)[0,1].round(2)
linear_model = np.polyfit(jsc, pce, 1)
print(f'y = {round(linear_model[0], 4)} x + {round(linear_model[1], 4)}')
linear_model_fn = np.poly1d(linear_model)
x_s = np.arange(0,40,0.1)
plt.plot(x_s, linear_model_fn(x_s), "r--", linewidth=1)
### R legend
font = {'family': 'arial',
        'color':  'black',
        'weight': 'light',
        'size': 12
        }

#box = {'facecolor': 'none', 'edgecolor': 'green', 'boxstyle': 'round'}

plt.text(3, 18, f'R = {r}', fontdict=font)

### coordinate axis
plt.xlim(0,35)
plt.ylim(0,20)
plt.xlabel('J$_{SC}$ (mAcm$^{-2}$)', fontdict={'family':'Arial','size':12,'weight':'normal'})
plt.ylabel('PCE (%)', fontdict={'family':'Arial','size':12,'weight':'normal'})
### locator
ax = plt.gca()
ax.set_aspect('auto', adjustable='box', anchor='C')
#ax.xaxis.set_major_locator(MaxNLocator(5))
ax.yaxis.set_major_locator(MaxNLocator(4))
ax.xaxis.set_minor_locator(AutoMinorLocator(5))
ax.yaxis.set_minor_locator(AutoMinorLocator(5))
for axis in ['top', 'bottom', 'left', 'right']:
    ax.spines[axis].set_linewidth(1.5)
ax.tick_params(axis='both', which='major', width=1.5, labelsize=12, direction='in')
ax.tick_params(axis='both', which='minor', length=3, width=1, direction='in')
print(r)
plt.savefig('./statistics/jsc_pce.png', bbox_inches='tight')

In [None]:
### scatter plot
plt.figure(figsize=(2.2, 2.2), dpi=300)
plt.rc('font', family='Arial', size=8, weight='normal')
plt.scatter(voc, pce, 1.5)
### linear regression
r = np.corrcoef(voc, pce)[0,1].round(2)
linear_model = np.polyfit(voc, pce, 1)
print(f'y = {round(linear_model[0], 4)} x + {round(linear_model[1], 4)}')
linear_model_fn = np.poly1d(linear_model)
x_s = np.arange(0,1.6,0.1)
#plt.plot(x_s, linear_model_fn(x_s), "r--", linewidth=1)
### R legend


#box = {'facecolor': 'none', 'edgecolor': 'green', 'boxstyle': 'round'}

plt.text(0.15, 18, f'R = {r}')
### coordinate axis
plt.xlim(0,1.5)
plt.ylim(0,20)
plt.xlabel('V$_{OC}$ (V)')
plt.ylabel('PCE (%)')
### locator
ax = plt.gca()
ax.set_aspect('auto', adjustable='box', anchor='C')
ax.xaxis.set_major_locator(MaxNLocator(3))
ax.yaxis.set_major_locator(MaxNLocator(4))
ax.xaxis.set_minor_locator(AutoMinorLocator(5))
ax.yaxis.set_minor_locator(AutoMinorLocator(5))
for axis in ['top', 'bottom', 'left', 'right']:
    ax.spines[axis].set_linewidth(1.5)
ax.tick_params(axis='both', which='major', length=7, width=1.25, direction='in')
ax.tick_params(axis='both', which='minor', length=4, width=1, direction='in')
print(r)
plt.savefig('./statistics/voc_pce_22_8.png', bbox_inches='tight')

In [None]:
### scatter plot
plt.figure(figsize=(2.5, 2.5), dpi=300)
plt.rc('font', family='Arial', size=8, weight='normal')
plt.scatter(voc, pce, 1.5)
### linear regression
r = np.corrcoef(voc, pce)[0,1].round(2)
linear_model = np.polyfit(voc, pce, 1)
print(f'y = {round(linear_model[0], 4)} x + {round(linear_model[1], 4)}')
linear_model_fn = np.poly1d(linear_model)
x_s = np.arange(0,1.6,0.1)
#plt.plot(x_s, linear_model_fn(x_s), "r--", linewidth=1)
### R legend
font = {'family': 'arial',
        'color':  'black',
        'weight': 'light',
        'size': 8
        }

#box = {'facecolor': 'none', 'edgecolor': 'green', 'boxstyle': 'round'}

plt.text(0.15, 18, f'R = {r}', fontdict=font)
### coordinate axis
plt.xlim(0,1.5)
plt.ylim(0,20)
plt.xlabel('V$_{OC}$ (V)', fontdict={'family':'Arial','size':8,'weight':'normal'})
plt.ylabel('PCE (%)', fontdict={'family':'Arial','size':8,'weight':'normal'})
### locator
ax = plt.gca()
ax.set_aspect('auto', adjustable='box', anchor='C')
#ax.xaxis.set_major_locator(MaxNLocator(5))
ax.yaxis.set_major_locator(MaxNLocator(4))
ax.xaxis.set_minor_locator(AutoMinorLocator(5))
ax.yaxis.set_minor_locator(AutoMinorLocator(5))
for axis in ['top', 'bottom', 'left', 'right']:
    ax.spines[axis].set_linewidth(1.5)
ax.tick_params(axis='both', which='major', width=1.5, labelsize=8, direction='in')
ax.tick_params(axis='both', which='minor', length=3, width=1, direction='in')
print(r)
plt.savefig('./statistics/voc_pce.png', bbox_inches='tight')

In [None]:
### scatter plot
plt.figure(figsize=(2.2, 2.2), dpi=300)
plt.rc('font', family='Arial', size=8, weight='normal')
plt.scatter(ff, pce, 1.5)
### linear regression
r = np.corrcoef(ff, pce)[0,1].round(2)
linear_model = np.polyfit(ff, pce, 1)
print(f'y = {round(linear_model[0], 4)} x + {round(linear_model[1], 4)}')
linear_model_fn = np.poly1d(linear_model)
x_s = np.arange(0,1.0,0.1)
#plt.plot(x_s, linear_model_fn(x_s), "r--", linewidth=1)
### R legend

#box = {'facecolor': 'none', 'edgecolor': 'green', 'boxstyle': 'round'}

plt.text(0.1, 18, f'R = {r}')
### coordinate axis
plt.xlim(0,1.0)
plt.ylim(0,20)
plt.xlabel('FF')
plt.ylabel('PCE (%)')
### locator
ax = plt.gca()
ax.set_aspect('auto', adjustable='box', anchor='C')
ax.xaxis.set_major_locator(MaxNLocator(5))
ax.yaxis.set_major_locator(MaxNLocator(4))
ax.xaxis.set_minor_locator(AutoMinorLocator(2))
ax.yaxis.set_minor_locator(AutoMinorLocator(5))
for axis in ['top', 'bottom', 'left', 'right']:
    ax.spines[axis].set_linewidth(1.5)
ax.tick_params(axis='both', which='major', length=7, width=1.25, direction='in')
ax.tick_params(axis='both', which='minor', length=4, width=1, direction='in')
print(r)
plt.savefig('./statistics/ff_pce_22_8.png', bbox_inches='tight')

In [None]:
### scatter plot
plt.figure(figsize=(2.5, 2.5), dpi=300)
plt.rc('font', family='Arial', size=8, weight='normal')
plt.scatter(ff, pce, 1.5)
### linear regression
r = np.corrcoef(ff, pce)[0,1].round(2)
linear_model = np.polyfit(ff, pce, 1)
print(f'y = {round(linear_model[0], 4)} x + {round(linear_model[1], 4)}')
linear_model_fn = np.poly1d(linear_model)
x_s = np.arange(0,1.0,0.1)
#plt.plot(x_s, linear_model_fn(x_s), "r--", linewidth=1)
### R legend
font = {'family': 'arial',
        'color':  'black',
        'weight': 'light',
        'size': 8
        }

#box = {'facecolor': 'none', 'edgecolor': 'green', 'boxstyle': 'round'}

plt.text(0.1, 18, f'R = {r}', fontdict=font)
### coordinate axis
plt.xlim(0,1.0)
plt.ylim(0,20)
plt.xlabel('FF', fontdict={'family':'Arial','size':8,'weight':'normal'})
plt.ylabel('PCE (%)', fontdict={'family':'Arial','size':8,'weight':'normal'})
### locator
ax = plt.gca()
ax.set_aspect('auto', adjustable='box', anchor='C')
#ax.xaxis.set_major_locator(MaxNLocator(5))
ax.yaxis.set_major_locator(MaxNLocator(4))
ax.xaxis.set_minor_locator(AutoMinorLocator(2))
ax.yaxis.set_minor_locator(AutoMinorLocator(5))
for axis in ['top', 'bottom', 'left', 'right']:
    ax.spines[axis].set_linewidth(1.5)
ax.tick_params(axis='both', which='major', width=1.5, labelsize=8, direction='in')
ax.tick_params(axis='both', which='minor', length=3, width=1, direction='in')
print(r)
plt.savefig('./statistics/ff_pce.png', bbox_inches='tight')

In [None]:
def boxplot(x, name, unit):
    fig = plt.figure(figsize=(3,3), dpi=200)
    plt.rc('font', family='Arial', size=8, weight='normal')
    bp = plt.boxplot(x, showmeans=True, labels=[name])
    plt.violinplot(x)
    plt.ylabel(unit)
    #plt.title(f'Distribution of {name}')
    ax = plt.gca()
    ax.yaxis.set_minor_locator(AutoMinorLocator(5))
    for axis in ['top', 'bottom', 'left', 'right']:
        ax.spines[axis].set_linewidth(1.5)
    ax.tick_params(axis='both', which='major', width=1.5, labelsize=8, direction='in')
    ax.tick_params(axis='y', which='minor', length=3, width=1, direction='in')
    plt.savefig('./statistics/'+name+'_distribution.jpg', bbox_inches='tight')
    plt.show()
    medians = [round(item.get_ydata()[0], 2) for item in bp['medians']]
    means = [round(item.get_ydata()[0], 2) for item in bp['means']]
    minimums = [round(item.get_ydata()[0], 2) for item in bp['caps']][::2]
    maximums = [round(item.get_ydata()[0], 2) for item in bp['caps']][1::2]
    q1 = [round(min(item.get_ydata()), 2) for item in bp['boxes']]
    q3 = [round(max(item.get_ydata()), 2) for item in bp['boxes']]
    fliers = [item.get_ydata() for item in bp['fliers']]
    lower_outliers = []
    upper_outliers = []
    for i in range(len(fliers)):
        lower_outliers_by_box = []
        upper_outliers_by_box = []
        for outlier in fliers[i]:
            if outlier < q1[i]:
                lower_outliers_by_box.append(round(outlier, 2))
            else:
                upper_outliers_by_box.append(round(outlier, 2))
        lower_outliers.append(lower_outliers_by_box)
        upper_outliers.append(upper_outliers_by_box)
    dictionary = {'mean':means,
                  'minimum':minimums,
                  'q1':q1,
                  'median':medians,
                  'q3':q3,
                  'maximum':maximums,
                  'min of full data':round(min(x), 2),
                  'max of full data':round(max(x), 2)}
    df_stat = pd.DataFrame(dictionary)
    display(df_stat)
    ax = draw_save_table(df_stat, header_columns=0, col_width=2.0)
    ax.figure.savefig(f'./statistics/'+name+'statistics_report.jpg')

In [None]:
boxplot(donor_homo, 'HOMO of Donor', '(eV)')
boxplot(donor_lumo, 'LUMO of Donor', '(eV)')
boxplot(donor_bandgap, 'Bandgap of Donor', '(eV)')
boxplot(acceptor_homo, 'HOMO of Acceptor', '(eV)')
boxplot(acceptor_lumo, 'LUMO of Acceptor', '(eV)')
boxplot(acceptor_bandgap, 'Bandgap of Acceptor', '(eV)')
boxplot(pce, 'PCE', '(%)')
boxplot(voc, 'V$_{OC}$', '(V)')
boxplot(jsc, 'J$_{SC}$', '(mAcm$^{-2}$)')
boxplot(ff, 'FF', ' ')

In [None]:
data1 = pce
data2 = jsc/2
# 创建主坐标轴
fig, ax1 = plt.subplots(figsize=(3.38, 2.83), dpi=300)

# 绘制两个直方图都在 ax1 上，避免被遮擋
ax1.hist(data1, bins=35, color='skyblue', alpha=0.7, label='PCE')
ax1.hist(data2, bins=35, color='salmon', alpha=0.7, label='J$_{SC}$')

# 設定主坐標軸
ax1.set_xlabel('PCE (%)')
ax1.set_ylabel('Count')
ax1.set_ylim([0, 400])
ax1.set_xlim([0, 20])

# 副坐標軸只作為標籤對應，不繪圖
ax2 = ax1.twiny()
ax2.set_xlim([0, 40])
ax2.set_xlabel('J$_{SC}$ (mA/cm$^{2}$)')

# 美化
font = {'family': 'Arial', 'weight': 'normal', 'size': 8}
plt.rc('font', **font)

for ax in [ax1, ax2]:
    ax.yaxis.set_major_locator(MaxNLocator(8))
    ax.yaxis.set_minor_locator(AutoMinorLocator(5))
    ax.tick_params(axis='both', which='major', width=1.25, length=7, direction='in')
    ax.tick_params(axis='both', which='minor', width=1, length=4, direction='in')
    ax.xaxis.set_major_locator(MaxNLocator(4))
    ax.xaxis.set_minor_locator(AutoMinorLocator(5))

for axis in ['top', 'bottom', 'left', 'right']:
    ax1.spines[axis].set_linewidth(1.5)

# 圖例與排版
#ax1.legend(loc='upper right', frameon=False)
plt.tight_layout()
plot_name = 'pce_jsc_dist'
plt.savefig(f'./{plot_name}.png', bbox_inches='tight')
plt.show()


In [None]:
data1 = pce
data2 = jsc

font = {'family' : 'Arial',
        'weight' : 'normal',
        'size'   : 8}

plt.rc('font', **font)

fig, ax1 = plt.subplots(figsize=(3.38, 2.83), dpi=300)

ax1.hist(data1, bins=35, color='skyblue', alpha=0.7, label='Data 1')
ax1.set_xlabel('PCE (%)')
ax1.set_ylabel('Count')

ax2 = ax1.twiny()

ax2.hist(data2, bins=35, color='salmon', alpha=0.7, label='Data 2')
ax2.set_xlabel('J$_{SC}$ (mA/cm$^{2}$)')

ax1.set_ylim([0, 450])
ax1.set_xlim([0, 20])
ax2.set_xlim([0, 40])

# Adjust tick locators and formatting
for ax in [ax1, ax2]:
    ax.yaxis.set_major_locator(MaxNLocator(10))
    ax.yaxis.set_minor_locator(AutoMinorLocator(5))
    ax.tick_params(axis='both', which='major', width=1.25, length=7, direction='in')
    ax.tick_params(axis='both', which='minor', width=1, length=4, direction='in')
    ax.xaxis.set_major_locator(MaxNLocator(4))
    ax.xaxis.set_minor_locator(AutoMinorLocator(5))

for axis in ['top', 'bottom', 'left', 'right']:
    ax1.spines[axis].set_linewidth(1.5)

plt.tight_layout()

plot_name = 'pce_jsc_dist'
plt.savefig(f'./{plot_name}.png', bbox_inches='tight')

plt.show()

In [None]:
data1 = voc
data2 = ff*1.5
fig, ax1 = plt.subplots(figsize=(3.38, 2.83), dpi=300)

ax1.hist(data1, bins=30, color='skyblue', alpha=0.7, label='V$_{OC}$')
ax1.hist(data2, bins=30, color='salmon', alpha=0.7, label='FF')

ax1.set_xlabel('V$_{OC}$ (V)')
ax1.set_ylabel('Count')
ax1.set_ylim([0, 1000])
ax1.set_xlim([0, 1.5])

ax2 = ax1.twiny()
ax2.set_xlim([0, 1.0])
ax2.set_xlabel('FF')

font = {'family': 'Arial', 'weight': 'normal', 'size': 8}
plt.rc('font', **font)

for ax in [ax1, ax2]:
    ax.yaxis.set_major_locator(MaxNLocator(5))
    ax.yaxis.set_minor_locator(AutoMinorLocator(2))
    ax.tick_params(axis='both', which='major', width=1.25, length=7, direction='in')
    ax.tick_params(axis='both', which='minor', width=1, length=4, direction='in')
    
for ax in [ax1]:
    ax.xaxis.set_major_locator(MaxNLocator(3))
    ax.xaxis.set_minor_locator(AutoMinorLocator(5))

for ax in [ax2]:
    ax.xaxis.set_major_locator(MaxNLocator(5))
    ax.xaxis.set_minor_locator(AutoMinorLocator(2))

for axis in ['top', 'bottom', 'left', 'right']:
    ax1.spines[axis].set_linewidth(1.5)

plt.tight_layout()
plot_name = 'voc_ff_dist'
plt.savefig(f'./{plot_name}.png', bbox_inches='tight')
plt.show()


In [None]:
data1 = voc
data2 = ff

font = {'family' : 'Arial',
        'weight' : 'normal',
        'size'   : 12}

plt.rc('font', **font)

fig, ax1 = plt.subplots(figsize=(5, 4),dpi=300)

ax1.hist(data1, bins=30, color='skyblue', alpha=0.7, label='Data 1')
ax1.set_xlabel('V$_{OC}$ (V)')
ax1.set_ylabel('Count')

ax2 = ax1.twiny()

ax2.hist(data2, bins=30, color='salmon', alpha=0.7, label='Data 2')
ax2.set_xlabel('FF')

ax1.set_ylim([0, 1000])
ax1.set_xlim([0, 1.5])
ax2.set_xlim([0, 1])

legend1 = ax1.legend(['V$_{OC}$'],loc='upper right', bbox_to_anchor=(1, 0.95), frameon=False)
legend2 = ax2.legend([' FF '],loc='upper right', bbox_to_anchor=(1, 0.85), frameon=False)

# Adjust tick locators and formatting
for ax in [ax1, ax2]:
    ax.yaxis.set_major_locator(MaxNLocator(5))
    ax.yaxis.set_minor_locator(AutoMinorLocator(2))
    ax.tick_params(axis='both', which='major', width=1.5, length=5, labelsize=10, direction='in')
    ax.tick_params(axis='both', which='minor', width=1, length=3, direction='in')
    ax.xaxis.set_major_locator(MaxNLocator(5))
    ax.xaxis.set_minor_locator(AutoMinorLocator(2))
    
for ax in [ax1]:
    ax.xaxis.set_major_locator(MaxNLocator(3))
    ax.xaxis.set_minor_locator(AutoMinorLocator(5))

for ax in [ax2]:
    ax.xaxis.set_major_locator(MaxNLocator(5))
    ax.xaxis.set_minor_locator(AutoMinorLocator(2))
    
for axis in ['top', 'bottom', 'left', 'right']:
    ax1.spines[axis].set_linewidth(2)
    
plt.tight_layout()

plot_name = 'voc_ff_dist'
plt.savefig(f'./{plot_name}.png', bbox_inches='tight')

plt.show()