In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as spopt
import os
import plotly.graph_objects as go

In [2]:
# Creatin a linear regression function.
def average(inarray, avlen):
    
    arrlen = inarray.size
    
    outarray = np.zeros(int(arrlen//avlen))
    
    for i in range(0, int(arrlen//avlen)):
        for j in range(0, int(avlen)):
            outarray[i] += inarray[int(avlen)*i+j]/avlen
    
    return outarray

def linear(x, a, b):
    return a*x + b

In [3]:
#Constants
h = 4.135667696E-15 #eV s
c = 3E17 #nm/s 

In [4]:
# Getting the path to the data
file_path= 'Data/'
files = os.listdir(file_path)

file_names_without_extension = [file.split('.TXT')[0] for file in files]

All_files = file_names_without_extension
All_files



['1hr Sample 1_1812062U1',
 '1hr Sample 2nd measurement_1812062U1',
 '1hr Sample 3rd measurement_1812062U1',
 '24hr Sample 1_1812062U1',
 '24hr Sample 2nd measurement_1812062U1',
 '24hr Sample 3rd measurement_1812062U1',
 'CFO 2 1st measurement_1812062U1',
 'CFO 2 2nd measurement_1812062U1',
 'CFO 3 1st measurement_1812062U1',
 'CFO 3 2nd measurement_1812062U1',
 'CFO 4 1st measurement_1812062U1',
 'CFO 4 2nd measurement_1812062U1',
 'CFO 5 1st measurement_1812062U1',
 'CFO 5 2nd measurement_1812062U1']

In [5]:
# ['1hr Sample 1_1812062U1',
#  '1hr Sample 2nd measurement_1812062U1',
#  '1hr Sample 3rd measurement_1812062U1',
#  '24hr Sample 1_1812062U1',
#  '24hr Sample 2nd measurement_1812062U1',
#  '24hr Sample 3rd measurement_1812062U1',
#  'CFO 2 1st measurement_1812062U1',
#  'CFO 2 2nd measurement_1812062U1',
#  'CFO 3 1st measurement_1812062U1',
#  'CFO 3 2nd measurement_1812062U1',
#  'CFO 4 1st measurement_1812062U1',
#  'CFO 4 2nd measurement_1812062U1',
#  'CFO 5 1st measurement_1812062U1',
#  'CFO 5 2nd measurement_1812062U1']

name = '24hr Sample 2nd measurement_1812062U1'  # ALways Change this part 
inputfilename = file_path + name + '.txt'
plotfilename = file_path + name  + '.png'

inputfilename

'Data/24hr Sample 2nd measurement_1812062U1.txt'

In [6]:
# Reading in the data in txt format
#Check for column separator (sep) and decimal separator (decimal)
data = pd.read_csv(inputfilename, sep=';', decimal='.', header=8)
data.columns = ['wavelength', 'spectrum', 'dark', 'reference', 'reflectance']

In [7]:
data.head()

Unnamed: 0,wavelength,spectrum,dark,reference,reflectance
0,159.37,-132.39,-42.735,-90.842,0.0
1,159.97,-185.288,-50.486,-70.171,0.0
2,160.58,-124.678,-25.808,-35.635,0.0
3,161.18,-94.695,-25.138,-25.01,0.0
4,161.78,88.138,94.894,120.607,-26.27382


In [8]:
# see the wavelength below 200  

data.loc[data['wavelength'] > 200]

Unnamed: 0,wavelength,spectrum,dark,reference,reflectance
68,200.36,-166.891,-93.145,177.495,-27.24846
69,200.97,-136.077,-82.302,196.373,-19.29632
70,201.57,-213.751,-113.032,209.457,-31.23172
71,202.17,-204.739,-148.906,190.947,-16.42874
72,202.77,-88.796,-57.228,282.150,-9.30165
...,...,...,...,...,...
1625,1097.88,-210.633,-309.150,-176.533,74.28680
1626,1098.42,-136.633,-266.150,-122.867,90.39200
1627,1098.97,-291.300,-339.150,-185.200,31.08152
1628,1099.51,-286.633,-350.817,-226.867,51.78164


In [9]:
#The following two arrays contain the measured wavelenght and reflectance values.
#The latter are scaled to 0 to 1.
wavelength = np.asarray(data['wavelength'].tolist())
reflectance = np.asarray(data['reflectance'].tolist())/100
print(f"Wavelength: {wavelength}\nReflectance: {reflectance}")

Wavelength: [ 159.37  159.97  160.58 ... 1098.97 1099.51 1100.05]
Reflectance: [0.        0.        0.        ... 0.3108152 0.5178164 0.6829718]


In [10]:
#All measurements below 200 nm are discarded as there is no light below that anyway.
cutindex = np.argmax(wavelength>200.)
wavelength = wavelength[cutindex:]
reflectance = reflectance[cutindex:]
print(f"Wavelength: {wavelength}\nReflectance: {reflectance}")

Wavelength: [ 200.36  200.97  201.57 ... 1098.97 1099.51 1100.05]
Reflectance: [-0.2724846 -0.1929632 -0.3123172 ...  0.3108152  0.5178164  0.6829718]


In [11]:
#All data points with reflectance above 100% or 1 are removed.
#Comment the following three lines out if not wanted.
greaterone = np.argwhere(reflectance > 1.0)
wavelength = np.delete(wavelength, greaterone)
reflectance = np.delete(reflectance, greaterone)
print(f"Wavelength: {wavelength}\nReflectance: {reflectance}")

Wavelength: [ 200.36  200.97  201.57 ... 1098.97 1099.51 1100.05]
Reflectance: [-0.2724846 -0.1929632 -0.3123172 ...  0.3108152  0.5178164  0.6829718]


In [12]:
#In order to remove noise subsequent points are averaged. 
#Aver specifies how many neighboring points. Aver = 1 means no change.
#
aver = 2
wave_ave = average(wavelength,aver)
refl_ave = average(reflectance,aver)
print(f"Wave_ave: {wave_ave}\nRefl_ave: {refl_ave}")

Wave_ave: [ 200.665  201.87   203.07   204.28   205.48   206.68   207.89   209.09
  210.295  211.5    212.7    213.9    215.11   216.31   217.51   218.72
  219.92   221.12   222.32   223.53   224.73   225.93   227.13   228.33
  229.54   230.74   231.94   233.14   234.34   235.54   236.745  237.95
  239.15   240.35   241.55   242.75   243.95   245.15   246.35   247.55
  248.75   249.95   251.15   252.35   253.55   254.75   255.95   257.15
  258.35   259.55   260.75   261.95   263.15   264.35   265.55   266.745
  267.94   269.14   270.34   271.54   272.74   273.935  275.13   276.33
  277.53   278.73   279.92   281.12   282.32   283.52   284.71   285.91
  287.11   288.305  289.5    290.7    291.895  293.09   294.29   295.485
  296.68   297.88   299.07   300.27   301.46   302.66   303.855  305.05
  306.245  307.44   308.635  309.83   311.025  312.22   313.415  314.61
  315.805  317.     318.19   319.39   320.58   321.775  322.97   324.16
  325.355  326.55   327.74   328.935  330.13   331.3

In [13]:
#A plot of the measured data and the averaged data
# Create the figure and specify the axes
fig = go.Figure()

# Add the scatter plot for the measured data
fig.add_trace(go.Scatter(x=wavelength, y=reflectance*100, mode='markers', marker=dict(color='blue'), name='Measured Data'))

# Set the layout for the plot
fig.update_layout(
    title="A plot of the measured data and the averaged data " + name,
    xaxis_title="Wavelength (nm)",
    yaxis_title="Diffuse Reflectance (%)",
    xaxis=dict(range=[400, 1050], autorange=False),
    yaxis=dict(range=[0, 100], autorange=False, type="linear")
)

# Show the plot
fig.show()


In [14]:
#The reflectance is used to calculate the Tauc-Plot data.
#Always check that the exponent in the y formula is correct for your case.
#
energy = np.flip(h*c*(1./wave_ave))
alpha = np.flip((1.0 - refl_ave)**2/(2*refl_ave))
print(f"Energy: {energy}\nAlpha: {alpha}")

Energy: [1.128135   1.12924907 1.13064862 1.13204649 1.13373781 1.13486299
 1.13599559 1.13713047 1.13940179 1.14054348 1.14168746 1.14282848
 1.1445522  1.14570424 1.1468586  1.14801529 1.14975471 1.15092259
 1.1520875  1.15325477 1.15501549 1.1561887  1.15736429 1.15854228
 1.15972809 1.16091633 1.16210157 1.16328923 1.16448478 1.16568279
 1.16687779 1.16808073 1.16928616 1.17049407 1.17170449 1.17291187
 1.17413285 1.17534524 1.17657129 1.17778872 1.17901988 1.18024801
 1.18147869 1.18333236 1.18456948 1.18581487 1.18705719 1.18830213
 1.18955538 1.19081127 1.19205837 1.19331956 1.19458342 1.19584996
 1.19711918 1.19837954 1.19965414 1.20093146 1.20221151 1.20349428
 1.2047798  1.20606806 1.20735909 1.20865288 1.20994944 1.21124879
 1.21255094 1.21385589 1.21516364 1.21648019 1.21779959 1.21911586
 1.22043499 1.22175697 1.22308784 1.22442162 1.22575226 1.22708579
 1.22843439 1.22977377 1.23111607 1.23247355 1.23382175 1.23517906
 1.23653935 1.23789647 1.23926895 1.24063207 1.2420106

In [15]:
# determine the size of the energ and alpha values

size = len(energy)
print(f"Energy: {energy}\nAlpha: {alpha}")
print(f"Size: {size}")

Energy: [1.128135   1.12924907 1.13064862 1.13204649 1.13373781 1.13486299
 1.13599559 1.13713047 1.13940179 1.14054348 1.14168746 1.14282848
 1.1445522  1.14570424 1.1468586  1.14801529 1.14975471 1.15092259
 1.1520875  1.15325477 1.15501549 1.1561887  1.15736429 1.15854228
 1.15972809 1.16091633 1.16210157 1.16328923 1.16448478 1.16568279
 1.16687779 1.16808073 1.16928616 1.17049407 1.17170449 1.17291187
 1.17413285 1.17534524 1.17657129 1.17778872 1.17901988 1.18024801
 1.18147869 1.18333236 1.18456948 1.18581487 1.18705719 1.18830213
 1.18955538 1.19081127 1.19205837 1.19331956 1.19458342 1.19584996
 1.19711918 1.19837954 1.19965414 1.20093146 1.20221151 1.20349428
 1.2047798  1.20606806 1.20735909 1.20865288 1.20994944 1.21124879
 1.21255094 1.21385589 1.21516364 1.21648019 1.21779959 1.21911586
 1.22043499 1.22175697 1.22308784 1.22442162 1.22575226 1.22708579
 1.22843439 1.22977377 1.23111607 1.23247355 1.23382175 1.23517906
 1.23653935 1.23789647 1.23926895 1.24063207 1.2420106

In [16]:
# # Filter invalid values for sqrt calculation
# valid_indices = np.where(alpha * energy > 0)
# energy_valid = energy[valid_indices]
# alpha_valid = alpha[valid_indices]
# y = (alpha_valid * energy_valid) ** (1. / 2)

In [17]:
# #y = (alpha*energy)**(2)
y = np.sqrt(np.abs(alpha * energy))
# #The derivate of y sometimes is needed for error checking
# yprime = np.gradient(y, energy)
# yprime

In [18]:
# # Ensure the lengths of energy_valid and y match
# assert len(energy_valid) == len(y), "Lengths of energy and y do not match"
assert len(energy) == len(y)

## Linear regression

In [19]:
#The next section fits a linear function to the specific start and stop energy range.
#The bandgap and its statistical error are printed in the console.
#

# Define the percentage thresholds for start and stop fit values
start_fit_value = 1.5
stop_fit_value = 2.1

In [20]:
start_fit = np.argmax(energy>start_fit_value)
stop_fit = np.argmax(energy>stop_fit_value)-1
fit_energy = energy[start_fit:stop_fit]
fit_y = y[start_fit:stop_fit]

In [21]:
# Calculate the error on the fit parameters
popt1, pcov1 = spopt.curve_fit(linear, fit_energy, fit_y)
perr1 = np.sqrt(np.diag(pcov1))

In [22]:
# Calculate and print bandgap
bandgap = -popt1[1] / popt1[0]
bandgap_error = np.sqrt((perr1[1] / popt1[0]) ** 2 + (perr1[0] * popt1[1] / popt1[0] ** 2) ** 2)
print(f'Bandgap is equal to {bandgap:.2f} +/- {bandgap_error:.2f} eV.')

Bandgap is equal to 1.22 +/- 0.02 eV.


In [23]:
# #WHAT IS THE NEXT LIONE FOr
# print('Bandgap is equal ' + str(-popt1[1]/popt1[0]) + ' +/- ' + str(np.sqrt((perr1[1]/popt1[0])**2+(perr1[0]*popt1[1]/popt1[0]**2)**2)) + 'eV.')


In [24]:
# Plot Tauc plot and linear fit using Plotly
fig2 = go.Figure()

fig2.add_trace(go.Scatter(
    x=energy,
    y=y,
    mode='markers',
    name='Tauc Plot Data'
))

fig2.add_trace(go.Scatter(
    x=fit_energy,
    y=linear(fit_energy, *popt1),
    mode='lines',
    name='Linear Fit',
    line=dict(color='red')
))

fig2.update_layout(
    xaxis_title='Energy (eV)',
    yaxis_title='(αhν)^(1/2) (eV^(1/2))',
    xaxis=dict(range=[1.1, 3.0]),
    yaxis=dict(range=[0, 5])
)

fig2.show()