![ChessUrl](https://young.scot/media/1513/working_information_digtialcareermythbustersgif_001.jpg "cat")


# Data Analysis

## Imports

In [1]:

# Enabling the `widget` backend.
# This requires jupyter-matplotlib a.k.a. ipympl.
# ipympl can be install via pip or conda.
%matplotlib widget
        
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ipywidgets import Output
import matplotlib
from scipy import integrate
import os

In [2]:
import analise as ana 
import cdata 
import hotznplots as plot

In [3]:
# Default value of display.max_rows is 10 i.e. at max 10 rows will be printed.
# Set it None to display all rows in the dataframe
pd.set_option('display.max_rows', None)

In [4]:
# change plot size
plt.rcParams["figure.figsize"] = (9,5)

## Importing the data

 The path of data to to copy from:
 *F:\HZDR\CD_data* for Daniel
 *C:\Users\crazy\Mega\Uni\Masterarbeit\Projekt\Data\CD_data*

In [6]:
#path = "F:\\HZDR\\CD_data"
path = "C:\\Users\\crazy\\Mega\\Uni\\Masterarbeit\\Projekt\\Data\\CD_data\\DNA Origami"
folderlist = os.listdir(path)
print(folderlist)

['Control_0M', 'Gdm2SO4_0.5M', 'Gdm2SO4_2M', 'Gdm2SO4_4M', 'GdmCl_0.5M', 'GdmCl_2M', 'GdmCl_2M_24h', 'GdmCl_4M', 'GdmSCN_0.5M', 'GdmSCN_2M', 'Urea_2M']


In [7]:
Control_0M = cdata.CData(os.path.join(path, folderlist[0]))
Gdm2SO4_05M = cdata.CData(os.path.join(path, folderlist[1]))
GdmCl_05M = cdata.CData(os.path.join(path, folderlist[2]))
GdmSCN_05M = cdata.CData(os.path.join(path, folderlist[3]))
Gdm2SO4_2M = cdata.CData(os.path.join(path, folderlist[4]))
Gdm2SO4_4M = cdata.CData(os.path.join(path, folderlist[5]))
GdmCl_2M = cdata.CData(os.path.join(path, folderlist[6]))
GdmCl_2M_24h = cdata.CData(os.path.join(path, folderlist[7]))
GdmSCN_2M = cdata.CData(os.path.join(path, folderlist[8]))
Urea_2M = cdata.CData(os.path.join(path, folderlist[9]))
GdmCl_4M = cdata.CData(os.path.join(path, folderlist[10]))

## Plotti

### Max and min functions

#### Max 210 - 230

In [7]:
df = ana.max_wave(Control_0M.t_df, wave_min=210, wave_max=230)
#df2 = ana.min_wave(Control_0M.t_df, wave_min=210, wave_max=230)
plot.function(["Wavelength", ], df, swap=True, y_label= "Wavelength [nm]",title="")
plot.function(["Value"], df, swap=True, title="Max CD value 210 - 230 nm")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

#### Min 230 - 260

In [8]:
#df = ana.max_wave(Control_0M.t_df, wave_min=220, wave_max=260)
df2 = ana.min_wave(GdmSCN_05M.t_df, wave_min=220, wave_max=260)
plot.function(["Wavelength", ], df2, swap=True, y_label= "Wavelength [nm]", title= "Min Wavelength 220 - 260 nm")
plot.function(["Value"], df2, swap=True, title= "Min CD value 220 - 260 nm")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

#### Max 260 - 300 nm

In [9]:
df = ana.max_wave(GdmSCN_2M.t_df, wave_min=260, wave_max=300)
#df2 = ana.min_wave(Control_0M.t_df, wave_min=260, wave_max=300)
plot.function(["Wavelength", ], df, swap=True, y_label= "Wavelength [nm]", title= "Max Wavelength 260 - 300 nm")
plot.function(["Value"], df, swap=True, title= "Max CD value 260 - 300 nm")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Ploting temperature[K]; X-achis = Temperatur; Y-achsis = CD

In [10]:
plot.function([212, 220, 247, 260, 275], Urea_2M.t_df, Control_0M.t_df)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [11]:
# Plotting wavelength[nm]; X-achsis = temperature; Y-achis = CD

In [12]:
plot.function([ 260, 275], Control_0M.t_df,GdmSCN_05M.t_df, GdmSCN_2M.t_df, y_scaling = (-2,5))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## **analise**

In [13]:
plot.function([275, 290], Control_0M.t_df, Gdm2SO4_05M.t_df)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Correlation Analysis

In [14]:
print(Control_0M.t_list, GdmCl_05M.t_list)

[20, 30, 40, 45, 50, 53, 56, 58, 60, 62, 64, 67, 70, 75, 80, 90] [20, 30, 40, 45, 50, 56, 58, 60, 62, 64, 67, 70, 75, 80, 90]


<div class="alert alert-block alert-info">
<b>Achtung:</b> When Messungen unterschiedlich viele Temperaturwerte haben, kommt ein Fehler. Am besten vergleichen. Einfachste Lösung ist hier unten mit 'Datei_mit_mehr_werten'.loc[:, 'Datei_mit_mehr_Werten'.t_list]
</div>

In [15]:
sync, assync = ana.correlation(GdmCl_05M.t_df, ref_spec=Control_0M.t_df.loc[:, GdmCl_05M.t_list], scaling='pareto')


In [16]:
plot.heatmap(assync)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [17]:
plot.heatmap(sync, assync, x_min=[220, 220], y_min=[220, 220], c_min=[-5])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [18]:
plot.heatmap(GdmSCN_05M)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### derivative example

In [10]:
deriv = ana.derivative(data_all.absorb_df)
data = GdmSCN_2M.absorb_df

In [15]:
plot.mult_func([260], [data], [deriv], title= "Absolut value and derivative", )                                                                                                                       #

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [22]:

print(deriv.max(axis=1), deriv.idxmax(axis=1))


Wavelength    90.000000
200            2.849753
201            2.542790
202            2.457902
203            2.019614
204            2.270501
205            3.173112
206            2.454104
207            1.342179
208            1.861131
209            1.681901
210            2.928545
211            3.059609
212            2.398827
213            2.073448
214            1.732772
215            2.191088
216            1.751661
217            1.453565
218            2.627458
219            2.570884
220            2.202083
221            1.366994
222            2.450186
223            2.840135
224            2.630694
225            3.161901
226            3.689438
227            2.025011
228            1.706574
229            3.008825
230            3.522675
231            2.649687
232            3.382818
233            3.606653
234            2.623721
235            1.174324
236            1.620939
237            2.522197
238            2.516258
239            3.799127
240            3

## Sigmoid Fits

In [43]:
test = ana.sigmoid_fit(ana.normalize(Control_0M.t_df, axis=1),  a_range=[0, 1], b_range=[20, 90])

x = np.arange(20,120)
data = pd.DataFrame([ana.sigmoid(x, a=0.5, b=61), ana.sigmoid(x, a=0.8, b=61), ana.sigmoid(x, a=0.2, b=61) ], index=["a=0.5", "a=0.8", "a=0.2"] )
plot.mult_func([247,"fit", "a=0.5", "a=0.8", "a=0.2"], [test, data], marker=["x","", "", "", "", "", ""], linestyle=["", "-", ":", ":", "_"], subtitle=["a", "b", "c", "d", "a=0.5", "a=0.8", "a=0.2"])

[ 0.18677156 61.81965343] [0.0197075  0.53616734]


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [None]:
test1 = ana.sigmoid_fit(ana.normalize(Control_0M.t_df, axis=1),  a_range=[0, 1], b_range=[20, 90])
test2 = ana.sigmoid_fit(ana.normalize(GdmCl_05M.t_df, axis = 1), a_range = [0,1], b_range = [20,90])

In [24]:
#print(deriv)

#### example for integral

In [25]:

integral = pd.DataFrame(['integral','error'], columns=['Wavelength'])
                        
for i in range(len(deriv.index)):
    f = ana.interpolate(deriv, i)
    value = integrate.quad(f, 30, 75)
    integral[i+200] = value
integral.set_index('Wavelength')
integral = integral.T



  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  value = integrate.quad(f, 30, 75)


In [26]:
print(integral)


                      0         1
Wavelength     integral     error
200         2570.454545       0.0
201            8.793748       0.0
202            4.060703       0.0
203            5.107813       0.0
204             6.50602       0.0
205           10.760795       0.0
206            6.385566       0.0
207            6.845118       0.0
208            3.290289       0.0
209           -0.385796       0.0
210           -1.347795       0.0
211           -0.763706       0.0
212            0.413888  0.000001
213            4.380085  0.000001
214            2.047519       0.0
215            3.451855       0.0
216           -1.570447       0.0
217            -1.35733       0.0
218           -7.178926       0.0
219           -2.399596       0.0
220           -0.042861       0.0
221            4.130761       0.0
222            6.842307       0.0
223             4.83898       0.0
224            6.039899       0.0
225            0.673239       0.0
226           -1.335985       0.0
227           

$$
\color{red}{\textbf{Ende. Vielleicht etwas $\LaTeX$ lernen?}}
$$

In [27]:
#data = Control_0M.t_df
prep = ana.normalize(data)
fit = ana.sigmoid_fit(prep)

KeyError: 247

In [None]:
plot.function(["fit", 247], fit)