In [16]:
'''
This script reads the raw data from SiPM_simulator, filters each pulse with
digital filter and samples it. Filtered and sampled data are saved
'''

%reset
#!/usr/bin/env python
#-*- coding: utf-8 -*-
import sys
import os
import numpy as np
import pandas as pd
from scipy import signal
from random import randint
import matplotlib.pyplot as plt

def main():
    fig = True
    #fig = False
    acquisition_frequency = 1e+11 # Hz
    pad_int = 50000
    cut_off_frequency = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 200]) * 1e+06 # Hz
    sample_frequency = np.array([100, 200, 500, 1000]) * 1e+06 # Hz

    path = "/home/sommema4/git/SiPM_simulator/data/raw/MicroFC60035_EJ-276"
    #path = "/home/sommema4/git/SiPM_simulator/data/test/MicroFC60035_EJ-276"
    #path = "/home/sommema4/git/SiPM_simulator/data/test/MicroFC60035_EJ-301"
    path_out = "/home/sommema4/git/SiPM_simulator/data/preproc"
    figures = "/home/sommema4/git/SiPM_simulator/data/figure"

    for filename in os.listdir(path):
        file_in = os.path.join(path, filename)
        if filename.endswith(".csv"):
            print(filename)
            sipm, scintillator, energy, particle = strip_filename(filename)
            dir_name = sipm + "_" + scintillator
            dir_out = os.path.join(path_out, dir_name)
            make_directory(dir_out)
            df = read_csv(file_in)
            if (fig == True):
                fig_dir = os.path.join(figures, dir_name)
                make_directory(fig_dir)
                plot_mean_pulse(df, fig_dir, energy, particle)

            df_padded = pad_dataset(df.iloc[:,1:], pad_int)
            for cut_off in cut_off_frequency:
                for sample in sample_frequency:
                    filename_out = str(cut_off * 1e-06) + "_" + str(sample * 1e-06) + ".csv"
                    file_out = os.path.join(dir_out, filename_out)
                    df_filter = filter_dataset(df_padded, cut_off, acquisition_frequency)
                    df_sample = sample_dataset(df_filter, sample, acquisition_frequency)
                    df_info = info_dataset(df_sample, energy, particle)
                    if os.path.isfile(file_out):
                        df_info.to_csv(file_out, mode='a', header=False) # append dataframe to file
                    else:
                        df_info.to_csv(file_out, header=False)
    return 0
    
def strip_filename(filename):
    base = os.path.basename(filename)
    name = os.path.splitext(base)[0]
    sipm, scintillator, energy, particle = name.split("_")
    return sipm, scintillator, energy, particle

def make_directory(dir_name):
    if not (os.path.isdir(dir_name)):
        try:
            os.makedirs(dir_name)
        except OSError:
            print ("Creation of the directory %s failed" % dir_name)
        else:
            print ("Successfully created the directory %s " % dir_name)

def read_csv(filename):
    # Read data to pandas dataframe and transpose it
    df = pd.read_csv(filename, sep=' ', header=None)
    df = df.iloc[:, :-1]
    #print(df.shape)
    df = df.T
    return df

def nan(df):
    for i in df.columns:
        #print(i)
        tt = pd.isnull(df[i])
        temp = tt[tt==True].index.values
        if (temp != []):
            print(i, temp)

def plot_mean_pulse(df, fig_dir, energy, particle):
    time = df.iloc[:, 0]
    light = df.iloc[:, 1::4]
    afterpulse = df.iloc[:, 2::4]
    crosstalk = df.iloc[:, 3::4]
    dark_current = df.iloc[:, 4::4]
    total = pd.DataFrame(light.values + crosstalk.values + afterpulse.values + dark_current.values)
    
    light = light.mean(axis=1)
    afterpulse = afterpulse.mean(axis=1)
    crosstalk = crosstalk.mean(axis=1)
    dark_current = dark_current.mean(axis=1)
    total = total.mean(axis=1)

    fig = plt.figure(figsize=(18,9))
    ax = fig.add_subplot(1,1,1)
    ax.plot(1e+09 * time, total, '-', color='k', label='Total')
    ax.plot(1e+09 * time, light, '--', color='r', label='Light component')
    ax.plot(1e+09 * time, afterpulse, '--', color='g', label='Afterpulse')
    ax.plot(1e+09 * time, crosstalk, '--', color='b', label='Optical crosstalk')
    ax.plot(1e+09 * time, dark_current, '--', color='y', label='Dark current')
    ax.set_xlabel('Time [ns]')
    ax.set_ylabel('Current through load resistor [A]')
    ax.set_title(str(energy) + '_' + str(particle))
    ax.set_xlim(0, 1000)
    ax.legend(loc='upper right')
    
    energy = round(float(energy), 2)
    fig_name = str(energy) + '_' + str(particle) + '.png'
    fig_path = os.path.join(fig_dir, fig_name)
    plt.savefig(fig_path)
    plt.close()
    
def pad_dataset(df, pad_int):
    #print(df)
    df.columns = np.arange(len(df.columns))
    pad = np.zeros((pad_int, df.shape[1]))
    df_pad = pd.DataFrame(data=pad)
    df_pad.columns = np.arange(len(df_pad.columns))
    df.columns = np.arange(len(df.columns))
    df_out = df_pad.append(df)
    #print(df_out)
    return df_out

def noise_dataset(df):
    ss

def filter_wrap(x, a):
    #print(x)
    #print(a)
    filt_x = pd.Series(signal.sosfilt(a, x))
    array_sum = np.sum(filt_x)
    if (np.isnan(array_sum)):
        print('Error detected: NaN value', filt_x)
    return filt_x

def filter_dataset(df, cut_off, freq_acq):
    # Construct and apply filter and amplification
    
    sos = signal.butter(2, cut_off/(freq_acq/2.0), btype='low', output='sos')
    rows, columns = df.shape
    df_out = pd.DataFrame()
    for i in range(columns):
        filt_x = filter_wrap(df.loc[:,i], sos)
        if i == 0:
            df_out = filt_x
        else:
            df_out = pd.concat([df_out, filt_x], axis=1)

    #df_out = df.apply(filter_wrap, args=(sos)) # for some stupid reason it gives me error
    return df_out

def sample_wrap(x, rep):
    start = randint(0, rep-1)
    return x[start::rep]

def sample_dataset(df, sample, freq_acq):
    rep = int(round(freq_acq / sample, 0)) 
    data = df.to_numpy()
    df_out = pd.DataFrame()
    for i in range(df.shape[1]//4):
        start = randint(0, rep-1)
        #print(data[start::rep, 4*i:4*i+4])
        temp = pd.DataFrame(data[start::rep, 4*i:4*i+4])
        if i == 0:
            df_out = temp
        else:
            df_out = pd.concat([df_out, temp], axis=1)
    #df_out[df_out < 1.0e-07] = 0
    df_out.columns = np.arange(len(df_out.columns))
    return df_out

def info_dataset(df, energy, particle):
    temp = np.zeros((2, df.shape[1]))
    info = pd.DataFrame(data=temp)
    info.iloc[0,:] = energy
    info.iloc[1,:] = particle
    df_out = df.append(info)
    df_out = df_out.T
    df_out.rename(columns={df_out.columns[-2]: "energy" }, inplace =True)
    df_out.rename(columns={df_out.columns[-1]: "particle" }, inplace =True)
    return df_out
    
main()



Once deleted, variables cannot be recovered. Proceed (y/[n])?  y


MicroFC60035_EJ-301_1.000000_p.csv
Successfully created the directory /home/sommema4/git/SiPM_simulator/data/preproc/MicroFC60035_EJ-301 
Successfully created the directory /home/sommema4/git/SiPM_simulator/data/figure/MicroFC60035_EJ-301 
MicroFC60035_EJ-301_0.200000_n.csv
MicroFC60035_EJ-301_0.600000_n.csv
MicroFC60035_EJ-301_2.000000_n.csv
MicroFC60035_EJ-301_0.700000_n.csv
MicroFC60035_EJ-301_0.800000_p.csv
MicroFC60035_EJ-301_0.600000_p.csv
MicroFC60035_EJ-301_0.400000_p.csv
MicroFC60035_EJ-301_2.000000_p.csv
MicroFC60035_EJ-301_0.100000_n.csv
MicroFC60035_EJ-301_0.100000_p.csv
MicroFC60035_EJ-301_0.500000_n.csv
MicroFC60035_EJ-301_0.900000_n.csv
MicroFC60035_EJ-301_0.800000_n.csv
MicroFC60035_EJ-301_0.300000_n.csv
MicroFC60035_EJ-301_0.500000_p.csv
MicroFC60035_EJ-301_1.500000_n.csv
MicroFC60035_EJ-301_0.300000_p.csv
MicroFC60035_EJ-301_0.900000_p.csv
MicroFC60035_EJ-301_1.000000_n.csv
MicroFC60035_EJ-301_0.400000_n.csv
MicroFC60035_EJ-301_0.700000_p.csv
MicroFC60035_EJ-301_1.500

0