# Data Reduction 3

***
**Integration / Normalisation**

- 3 notebook for 20K samples (ASW - ASW_C2H6 - C2H6) + 1 notebook for higher T
- Perform for all Samples (xx/xx/2023)
***

## Imports

In [None]:
%matplotlib inline

import numpy 
import math
from math import isnan
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.colors as mcolors
import pandas as pd
import glob
from functools import reduce
import ipywidgets as widgets
import matplotlib.gridspec as gridspec
from datetime import datetime
from IPython.core.display import SVG
import re
import json

## Input Parameters

$\color{red}{\text{Implement manually !}}$

In [None]:
# 0.1 Date
date = "2021_03_08"

# 0.2 XP
XP = "XP_1-2"

# 0.3 Sample type
spl = "C2H6_ASW"

# 0.4 Deposition Temperature
Tdep = "20"

# Colormap normalisation

# Be carefull because it may depend between the different samples

Tmin = 20
Tmax = 150


# Is there any Isotherm ?

Iso = False

# How do I deal with this now ?

## 3.1 <u>Integration</u> 

In the following we are going to integrate the spectra between two borns with the trapezoidal integration technique

### Function definition

In [None]:
# this function computes the inegral of the scan of index index, between borns 1 and 2 (in cm-1)

def integration(index, data, wavelength, born_1, born_2):

    nwl = len(wavelength) #number of wavelenght - spectra resolution

    list_names =  list(data.columns.values.tolist())
    
    scan_place = 0
    
    for i in range(len(list_names)):
        
        name_corr = list_names[i].split('_')
        
        if int(name_corr[-1]) == index:
    
            scan_place = i
    
    scan = data.T.iloc[scan_place]
        
    index_born_1 = 0
    index_born_2 = 0
    
    for iwl in range(nwl):
        
        if abs(wavelength[iwl] - born_1) < 0.5:
            
            index_born_1 = iwl
    
        elif abs(wavelength[iwl] - born_2) < 0.5:
            
            index_born_2 = iwl
    
    area = 0
    
    for iint in range(index_born_1, index_born_2):
        
        d_area = 0.5*(scan[iint]+scan[iint+1])*(wavelength[iint+1]-wavelength[iint])
    
        area = area + d_area
        
    return area

### 3.1.A Oh stretch

In [None]:
#range A
wavelengthA = dataA_BC.T.iloc[0]
wavelengthA = wavelengthA.reset_index(drop=True)

In [None]:
#wavelengthA

In [None]:
#range A
dataA_BC_I = dataA_BC.T.iloc[1:].T
dataA_BC_I = dataA_BC_I.reset_index(drop=True)

In [None]:
dataA_BC_I

In [None]:
nscan = len(list(dataA_BC_I.columns.values.tolist()))

areas = numpy.zeros(nscan)
areasBCA = numpy.zeros(nscan)

times = numpy.zeros(nscan)

for iint in range(0, nscan):
    
    #areas[iint] = integration(iint+1, data, wavelength, 2900, 3900)
    areasBCA[iint] = integration(iint+1, dataA_BC_I, wavelengthA, 2800, 4000)
    #times[iint] = 15*iint
    

In [None]:
areasBCA

### 3.1.B Bending Modes

In [None]:
#range C (think to supress libration mode contribution!)
wavelengthC = dataC_BC_U.T.iloc[0]
wavelengthC = wavelengthC.reset_index(drop=True)

In [None]:
wavelengthC

In [None]:
#range C
dataC_BC_I = dataC_BC_U.T.iloc[1:].T
dataC_BC_I = dataC_BC_I.reset_index(drop=True)

In [None]:
dataC_BC_I

In [None]:
nscan = len(list(dataC_BC_I.columns.values.tolist()))

areas = numpy.zeros(nscan)
areasBCC = numpy.zeros(nscan)

times = numpy.zeros(nscan)

for iint in range(0, nscan):
    
    #areas[iint] = integration(iint+1, data, wavelength, 2900, 3900)
    areasBCC[iint] = integration(iint+1, dataC_BC_I, wavelengthC, 1040, 1900)
    #times[iint] = 15*iint
    

In [None]:
areasBCC

### 3.1.4 Clean 

In [None]:
#range A

areasBCA_df = pd.DataFrame({ 'File number': dataA_BC_I.iloc[0,:], 'Int_A': areasBCA[:]})
areasBCA_df = areasBCA_df.T.iloc[1:].T

areasBCA_df = areasBCA_df.reset_index()
areasBCA_df['Name'] = areasBCA_df['index']
areasBCA_df= areasBCA_df.T.iloc[1:].T

#range C

areasBCC_df = pd.DataFrame({ 'File number': dataC_BC_I.iloc[0,:], 'Int_C': areasBCC[:]})
areasBCC_df = areasBCC_df.T.iloc[1:].T

areasBCC_df = areasBCC_df.reset_index()
areasBCC_df['Name'] = areasBCC_df['index']
areasBCC_df= areasBCC_df.T.iloc[1:].T

In [None]:
#areasBCA_df
#areasBCC_df

### 3.1.5 Append to data Annex

In [None]:
data_anex_df = pd.merge(data_anex_df, areasBCA_df, on="Name")
data_anex_df = pd.merge(data_anex_df, areasBCC_df, on="Name")


In [None]:
#data_anex_df

### 3.1.6 Plotting

#### Dict values

In [None]:
fig= plt.figure(figsize=(8,6))

for keys, values in Tdictionary.items():

    plt.plot(keys, data_anex_df.iloc[values,28], '+', mew=3, ms=12, c=cm.jet(values/nscan),label= str(keys)+" - "+str(values+1)+" - "+str(int(data_anex_df.iloc[values,28])))    

plt.title('{0} DR3 Integration '.format(date))
#plt.axis([3210,3270])
plt.xlabel('Temperature (K)').set_fontsize(13)
plt.ylabel('OH stretch Integration (AU)').set_fontsize(13)
#ax = fig.gca()
#plt.grid()
plt.legend()


#plt.savefig('D:\DATA-Processing\PAC\{}/Samples/{}/Plots/DR/DR2_{}_PeakA-Integration.png'.format(XP, date, date))

plt.show()

#### Dict values

In [None]:
if Iso == True:   
    
    fig= plt.figure(figsize=(8,6))

    for keys, values in Isodic1.items():

        plt.plot(keys, data_anex_df.iloc[values,28], '+', mew=3, ms=12, c=cm.jet(values/nscan),label= str(keys)+" - "+str(values+1)+" - "+str(int(data_anex_df.iloc[values,28])))    

    plt.title('{0} DR3 Integration Iso {1}  '.format(date, Tiso1))
    #plt.axis([3210,3270])
    plt.xlabel('Time (h)').set_fontsize(13)
    plt.ylabel('OH stretch Integration (AU)').set_fontsize(13)
    ax = fig.gca()
    ax.xaxis.set_major_locator(plt.MaxNLocator(10))  
    #plt.grid()
    #plt.legend()


    plt.savefig('D:\DATA-Processing\PAC\{}/Samples/{}/Plots/DR/DR2_{}_PeakA-Integration_Iso_{}.png'.format(XP, date, date, Tiso1))

    plt.show()

## 3.2 <u>Column Density Calculation </u>

### 3.2.1 Theory

To do

### 3.2.2 Normalisation factor

Dep_val_A correspond to the first value found in the column Int_A or C. This corrspond to the integration for from the deposition for all the samples

In [None]:
Dep_val_A = data_anex_df['Int_A'].values[0]

Dep_val_C = data_anex_df['Int_C'].values[0]

In [None]:
#Dep_val_A 
#Dep_val_C

### 3.3 Optical depth Calcul

### 3.3.A Oh stretch

Aa taken from ref ... Hagen 1981

Find more recent value

In [None]:
Aa = 2 * 10**(-16)

In [None]:
NA = Dep_val_A / Aa

In [None]:
#NA

### 3.3.B Oh bend

In [None]:
Ac = 1.2 * 10**(-17)

In [None]:
NC = Dep_val_C / Ac

In [None]:
#NC

### 3.3.C Comparison and analysis

In [None]:
comp = NA/NC * 100

In [None]:
#comp

### 3.3.E Append to Ice_thickness_df

Prior to the Reduction analysis, the laser-diode thickness notebook should have been performed and a csv being producing with he inputs. We will append the previously obtained data into this csv

In [None]:
Ice_thickness_df = pd.read_csv('D:\DATA-Processing\PAC\Ice_thickness.csv')

In [None]:
#Ice_thickness_df

In [None]:
Ice_thickness_df.loc[Ice_thickness_df["Date"] == date, 'Na'] = NA 
Ice_thickness_df.loc[Ice_thickness_df["Date"] == date, 'Nc'] = NC 

In [None]:
#Ice_thickness_df

In [None]:
Ice_thickness_df.to_csv('D:\DATA-Processing\PAC\Ice_thickness.csv', index=False)

## 3.3 <u> Normalisation </u>

2 different normalisations
- The 20K sample can be normalised with the first scan (ie the deposition)
- The sample deposited at the higher T needs to be with normalised with respect to the Integration from the normalized scan (3.3?)  

### 20K depositions

For 20K deposition no problem, we take the Integration value of the deposition and normalised through it

Insert if statement: if TRamp[:3] not Nan continue otherwise don`t perform normalisation

In [None]:
#Dep_val_A

 $\color{red}{\text{Here we need to have a if statement that say that if the sample is deposited at 20K - norm factor = 100 / Dep_val_A, else, another notebook is run to get the Norm factor at the temperature of interest!}}$

In [None]:
if Tdep == "20":

    
    NormFactor = 100 / Dep_val_A
    print(NormFactor)
    
else:

    %run -i "HighTreduction.py"
    print(mean_X)
    NormFactor = mean_X / Dep_val_A

In [None]:
#NormFactor

 $\color{red}{\text{Append Norm factor into sample thick csv}}$

In [None]:
dataA_N = dataA_BC_I * NormFactor

Saanity Check:
    
- Insert wavenumber (before export)

In [None]:
dataA_N.insert(loc = 0,
          column = 'Wavenumber',
          value =  wavelengthA)

dataA_N

## Export as csv

In [None]:
dataA_N.to_csv("D:\DATA-Processing\PAC\{}/Samples/{}/Data/DR/DR3_{}_A.csv".format(XP,date,date))

## 3.4 <u>Integration II</u> 

Purpose here is to integrate the normalised scan so we can have integration value for scans at higher temperature to use as normalisation value for scans deposited at higherrt temperature

In [None]:
#dataA_N

Supress wavenumber

In [None]:
dataA_N = dataA_N.T.iloc[1:].T

#dataA_N

In [None]:
nscan = len(list(dataA_N.columns.values.tolist()))

areas = numpy.zeros(nscan)
areasBCA2 = numpy.zeros(nscan)

times = numpy.zeros(nscan)

for iint in range(0, nscan):
    
    #areas[iint] = integration(iint+1, data, wavelength, 2900, 3900)
    areasBCA2[iint] = integration(iint+1, dataA_N, wavelengthA, 2800, 4000)
    #times[iint] = 15*iint
    

In [None]:
areasBCA2

In [None]:
areasBCA2_df = pd.DataFrame({ 'File number': dataA_BC_I.iloc[0,:], 'Int_N_A': areasBCA2[:]})
areasBCA2_df = areasBCA2_df.T.iloc[1:].T
areasBCA2_df = areasBCA2_df.reset_index()
areasBCA2_df['Name'] = areasBCA2_df['index']
areasBCA2_df= areasBCA2_df.T.iloc[1:].T

In [None]:
#areasBCA2_df

In [None]:
data_anex_df = pd.merge(data_anex_df, areasBCA2_df, on="Name")

### High Temperature deposition

For the higher temperature deposited, normalising through the deposition would not allowed a good comparison with the sample deposited at 20K. Hence, we will make an average of the normalised value (from Integration II) at the temperature of interest and use this value as a normalisation factor.<br>

We will use a separate notebook to to that that will run separately if need be

## Export data_Anex as csv

In [None]:
data_anex_df.to_csv("D:\DATA-Processing\PAC\{}/Samples/{}/Data/DR/{}_data_annex.csv".format(XP,date,date), index=False)

***

<img style="float: right;" src="..\..\Docs\Logo_work_in_progress.svg" alt="logo" width="160px"/>

## About this notebook

**Updated On:** 2023-03-15 


**Author:** Vincent Deguin, PhD Student.
<br>
 **Contact:** vincent.deguin@open.ac.uk  