In [None]:
''' Creation: 2021.09.07
    Last update: 2021.09.17
    
    Useful functions for correctly parsing the aging data files
    '''

def data_parsing(df):
    index_data_header = [0]
    for ind,x in enumerate(df[' Voltage:    ']):
        if ('Volt' in x) or  ('Ref Cell' in x):
            index_data_header.append(ind)

    index_data_header.append(len(df))
    print(index_data_header)
    list_df = []
    

    delta=0
    for i in range(len(index_data_header)-1):
        if i !=0 :
            delta=1
        list_df.append(df.iloc[df.index.isin(range(index_data_header[i]+delta,
                                                   index_data_header[i+1]))])
    return list_df

def drop_wrong_values(df):
    x = []
    y = []
    for data_x,data_y in zip(df[' Voltage:    '],df[' Current:']):
        try:
            data_x = float(data_x)
            if data_x > 0.0:
                x.append(data_x)
                y.append(float(data_y))
        except:
            pass
    return(x,y)

def select_files():
    
    '''The function `select_files` interactively selects *.txt or *.txt files from
    a directory.
    
    Args:
       DEFAULT_DIR (Path, global): root directory used for the file selection.
       
    Returns:
       filenames (list of str): list of selected files
    '''
    
    # Standard library imports
    import tkinter as tk
    from tkinter import ttk
    from tkinter import filedialog as fd


    root = tk.Tk()
    root.title('File Dialog')
    root.resizable(False, False)
    root.geometry('300x150')
    global filenames, filetypes
    filetypes = (
            ('csv files', '*.csv'),
            ('text files', '*.txt'), 
            )

    def select_files_():
        global filenames,filetypes
        
        filenames = fd.askopenfilenames(
            title='Select files',
            initialdir=DEFAULT_DIR,
            filetypes=filetypes)

    open_button = ttk.Button(
        root,
        text='Select Files',
        command=select_files_)


    open_button.pack(expand=True)

    root.mainloop()
    
    return filenames

In [None]:
''' Creation: 2021.09.12
    Last update: 2021.09.17
    
    User specific paths definition
    Based on information that should be available in the "pvu_user_config.json file" 
    '''

# Standard library imports
from pathlib import Path

# Local imports 
import PVcharacterization_Utils as pvu

## User identification
DEFAULT_DIR = Path.home()

# List of available bow data files
datafiles_list = select_files()
datafiles_list = list(datafiles_list)
datafiles_list.sort()

print('Files list:     \n',datafiles_list)

In [None]:
''' Creation: 2021.09.12
    Last update: 2021.09.12
    
    Selection by the user of the data files
    corresponding to the same irradiance
    '''

# Standard library imports
import os

# Local imports 
import PVcharacterization_Utils as pvu

title = 'Select the 3 aging rawdata files (T0, T1 and T2 )'
datafiles_select = pvu.Select_items(datafiles_list,title,'multiple')

dic = {}
times = []
for i in range(len(datafiles_select)):
    time = os.path.splitext(datafiles_select[i])[0][-2:]
    times.append(time)
    dic[time] = {}
    dic[time]['filename'] = os.path.splitext(datafiles_select[i])[0]
    
print(times)
print(dic)
#print('List of sorted aging times:',dic["aging_times_file"])

In [None]:
''' Creation: 2021.09.07
    Last update: 2021.09.12
    
    For each file (.csv):
    - Read the header of the data file as meta_data dataframe and save as part of dict "dic"
    - Read the data located in the data file after the header, as dataframe
    - Parse the dataframe to locate the I(V) data by searching for specific column headers
    - Remove the lines where data are equal to '0.0'in each of the parsed dataframes
    - Convert the I(V) dataframe of interest into x,y data to plot
    
    Compute item variations for 3 couples of time : (T0,T1), (T0,T2) and (T1,T2)
    
    '''

# Standard library imports
import os
import sys
from pathlib import Path

# 3rd party imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
item_list = [('IrrCorr','W/m2'),('Pmax','W'),('Voc','V'),('Isc','A'),('Fill Factor','')]

for time in times:
    
    filepath = my_pvaging_path / Path(dic[time]['filename'] + '.csv')
    print('\n',filepath,'\n')

    # Read the metadata in the file (83 lines)
    header_length = 83
    if sys.platform == 'win32': 
        data_deb = header_length*2-1
    elif sys.platform == 'darwin':
        data_deb = header_length

    meta_data = pd.read_csv(filepath,
                     sep=',',
                     nrows=header_length,
                     header=None
                     ) # reads the meta data
    meta_data = dict(zip(meta_data[0],meta_data[1]))
    meta_data = {key.split(':')[0]:val for key,val  in meta_data.items()}
    
    assert time == meta_data['ID'][-2:],'Aging time in datafile' + dic[time]['filename']+' schould be '+ time
    
    print('Time:',time)    
    for item in item_list:
        dic[time][item[0]] = float(meta_data[item[0]])
        print(item[0]+':',dic[time][item[0]],item[1])
    
    # Read the I(V) data
    df_data = pd.read_csv(filepath,
                 sep=',',
                 skiprows=data_deb,
                 ) # reads the data   
    list_df_data = data_parsing(df_data)
    color =["r",'g','b']
    x,y = drop_wrong_values(list_df_data[0])
    plt.plot(x,y)
    plt.show()

# Compute item variations
t0 , t1 , t2 = times[0] , times[1] , times[2]
irr_key = item_list[0][0]
irradiance =  dic[t0][irr_key]
assert (dic[t1][irr_key] == irradiance and dic[t2][irr_key] == irradiance),\
'Irradiances of selected files should be the same'

print('Irradiance:',irradiance,item_list[0][1])
dic_var = {}
dic_var[irradiance]= {}
t_tuples = [(t0,t1),(t0,t2),(t1,t2)]
for t_tuple in t_tuples:
    aged = t_tuple[1]
    init = t_tuple[0]
    delta_t = aged + '-' + init
    print('\nVariations for',delta_t)
    dic_var[irradiance][delta_t] = {}
    for item in item_list[1:]:
        ditem_key = 'Delta '+ item[0]
        dic_var[irradiance][delta_t][ditem_key]= ((dic[aged][item[0]] - dic[init][item[0]])\
                                                              /dic[init][item[0]])*100    
        print(ditem_key + ':',round(float(dic_var[irradiance][delta_t][ditem_key]),4),'(%)')

In [None]:
def data_parsing_1(filepath):
    
    df_data = pd.read_csv(filepath,
                     sep=',',
                     skiprows=0,
                     header=None
                     ) 

    index_data_header = np.where(df_data.iloc[:,0].str.contains('^ Volt|Ref Cell',
                                                                case=True,
                                                                regex=True))[0]
    index_data_header = np.insert(index_data_header,
                                  [0,len(index_data_header)],
                                  [0,len(df_data)-3])

    meta_data = df_data.iloc[np.r_[index_data_header[0]:
                                   index_data_header[1]]]
    meta_data = dict(zip(meta_data[0],meta_data[1]))
    meta_data = {key.split(':')[0]:val for key,val  in meta_data.items()}

    list_df = []
    for i in range(1,len(index_data_header)-1):
        dg = df_data.iloc[np.r_[index_data_header[i]+1:
                                index_data_header[i+1]]].astype(float)
        col_name = df_data.iloc[index_data_header[i]]
        dg.columns = col_name
        dg = dg.loc[dg[col_name[0] ] > 0]
        list_df.append(dg) 
    return meta_data,list_df 

In [None]:
''' Creation: 2021.09.07
    Last update: 2021.09.12
    
    For each file (.csv):
    - Read the header of the data file as meta_data dataframe and save as part of dict "dic"
    - Read the data located in the data file after the header, as dataframe
    - Parse the dataframe to locate the I(V) data by searching for specific column headers
    - Remove the lines where data are equal to '0.0'in each of the parsed dataframes
    - Convert the I(V) dataframe of interest into x,y data to plot
    
    Compute item variations for 3 couples of time : (T0,T1), (T0,T2) and (T1,T2)
    
    '''

# Standard library imports
import os
import sys
from pathlib import Path

# 3rd party imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
item_list = [('IrrCorr','W/m2'),('Pmax','W'),('Voc','V'),('Isc','A'),('Fill Factor','')]

for time in times:
    
    filepath = my_pvaging_path / Path(dic[time]['filename'] + '.csv')
    print('\n',filepath,'\n')

    meta_data,list_df_data = data_parsing_1(filepath)
    
    assert time == meta_data['ID'][-2:],'Aging time in datafile' + dic[time]['filename']+' schould be '+ time
    
    print('Time:',time)    
    for item in item_list:
        dic[time][item[0]] = float(meta_data[item[0]])
        print(item[0]+':',dic[time][item[0]],item[1])
    
    color =["r",'g','b']
    col = list_df[0].columns
    list_df[0].plot(x=col[0], y=col[1])
    plt.show()

# Compute item variations
t0 , t1 , t2 = times[0] , times[1] , times[2]
irr_key = item_list[0][0]
irradiance =  dic[t0][irr_key]
assert (dic[t1][irr_key] == irradiance and dic[t2][irr_key] == irradiance),\
'Irradiances of selected files should be the same'

print('Irradiance:',irradiance,item_list[0][1])
dic_var = {}
dic_var[irradiance]= {}
t_tuples = [(t0,t1),(t0,t2),(t1,t2)]
for t_tuple in t_tuples:
    aged = t_tuple[1]
    init = t_tuple[0]
    delta_t = aged + '-' + init
    print('\nVariations for',delta_t)
    dic_var[irradiance][delta_t] = {}
    for item in item_list[1:]:
        ditem_key = 'Delta '+ item[0]
        dic_var[irradiance][delta_t][ditem_key]= ((dic[aged][item[0]] - dic[init][item[0]])\
                                                              /dic[init][item[0]])*100    
        print(ditem_key + ':',round(float(dic_var[irradiance][delta_t][ditem_key]),4),'(%)')

In [None]:
x = ['EEE',4,'DDD']


In [None]:
from pathlib import Path
DEFAULT_DIR = Path.home()
filenames = select_files()
for index, filename in enumerate(filenames):
    string_val = filename.split('/')[-1]
    print(f'{index}: {string_val}')

In [None]:
def data_parsing_2(filepath):
    
    '''
    The function `data_parsing_2` reads a csv file organized as follow:
    
                ==========  =================================
                Title:       HET JNHM72 6x12 M2 0200W
                Comment:     
                Op:          Util
                ID:          JINERGY3272023326035_0200W_T0
                Mod Type:    ModuleType1
                Date:        2/15/2021
                ...          ...
                Voltage:     Current:
                -0.740710    1.8377770
                -0.740387    1.8374640
                -0.734611    1.8376460
                ...          ....
                Ref Cell:   Lamp I:
                199.9875    200.0105
                199.9824    200.1674
                ...         ...
                Voltage1:   Current1:
                -0.740710   1.8377770
                -0.740387   1.8374640
                -0.734611   1.8376460
                ...         ....
                Ref Cell1:  Lamp I1:
                ...         ....
                Voltage2:   Current2:
                -0.740710   1.8377770
                -0.740387   1.8374640
                -0.734611   1.8376460
                ...         ....
                Ref Cell2:  Lamp I2:
                0.008593    1.823402
                0.043122    1.823085
                ...         ....
                DarkRsh:    0
                DarkV:       ark I:
                ==========  =================================
    
    The `.csv` file is parsed in a namedtuple `data` where:
       
       - data.IV0, data.IV1, data.IV2 are dataframes containing the `IV` curves as :
       
                ======== ==========
                Voltage	 Current
                ======== ==========
                0.008593  1.823402
                0.043122  1.823085
                0.070891  1.823253
                xxxx      xxxx
                50.0      1.823253
                ======== ==========
       - data.Ref_Cell0, data.Ref_Cell1, data.Ref_Cell2 are dataframes containing
       the irradiance curves as:
       
                ======== ==========
                Ref_Cell  Lamp_I
                ======== ==========
                199.9875  200.0105
                199.9824  200.1674
                xxxxx     xxxxx
                199.9824  200.0074
                ======== ==========
       - data.meta_data is a dict containing the header :
    .. code-block:: python 
    
      data.meta_data = {
      "Title":"HET JNHM72 6x12 M2 0200W",
      "Comment":"",
      "Op":"Util",
      .... :.....,
      }
      
    Args:
        filename (Path): name of the .csv file
    
    Returns:
        data (namedtuple): results of the file parsing (see summary)
    
    '''
    
    # Standard library imports
    from collections import namedtuple
    
    # 3rd party imports
    import pandas as pd


    data_struct = namedtuple("PV_module_test", 
                             ["meta_data", 
                              "IV0",
                              "IV1",
                              "IV2",
                              "Ref_Cell0",
                              "Ref_Cell1",
                              "Ref_Cell2"]
                             )
    
    df_data = pd.read_csv(filepath,
                          sep=',',
                          skiprows=0,
                          header=None
                          ) 

    index_data_header = np.where(df_data.iloc[:,0].str.contains('^ Volt|Ref Cell', # Find the indice of the
                                                                case=True,         # headers of th IV and
                                                                regex=True))[0]    # Ref Cell data
    
    index_data_header = np.insert(index_data_header,          # Insersion of index 0 and the index of th
                                  [0,len(index_data_header)], # last numerical value
                                  [0,len(df_data)-3])

    meta_data = df_data.iloc[np.r_[index_data_header[0]:
                                   index_data_header[1]]]
    meta_data = dict(zip(meta_data[0],meta_data[1]))
    meta_data = {key.split(':')[0]:val for key,val  in meta_data.items()} 

    list_df = []
    for i in range(1,len(index_data_header)-1):
        dg = df_data.iloc[np.r_[index_data_header[i]+1:
                                index_data_header[i+1]]].astype(float)

        dg = dg.loc[dg[0] > 0]
        dg.index = list(range(len(dg)))
        
        if 'Voltage' in  df_data.iloc[index_data_header[i]][0]:
            dg.columns = ['Voltage','Current']
        else:
            dg.columns = ['Ref_Cell','Lamp_I']
            
        list_df.append(dg) 
        
    data = data_struct(meta_data=meta_data,
                       IV0=list_df[0],
                       IV1=list_df[2],
                       IV2=list_df[4],
                       Ref_Cell0=list_df[1],
                       Ref_Cell1=list_df[3],
                       Ref_Cell2=list_df[0],
                      )
    return data

def parse_filename(file):
    
    '''
    Let the string "file" structured as follow:
      '~/JINERGY<ddddddddddddd>_<dddd>W_T<d>.csv'
    where <> is a placeholder, d a digit and ~ the relative or absolute path of the file
    
    parse_filename parses "file" in thee chunks: JINERGY<ddddddddddddd>, <dddd>, T<d> and stores them in
    the nametuple FileInfo.
    
    ex: let file = 'C:/Users/franc/PVcharacterization_files/JINERGY3272023326035_0200W_T2.csv'
    we obtain:
        FileInfo.power = 200
        FileInfo.time = "T2"
        FileInfo.time = "JINERGY3272023326035"
        
    Args:
       file (str): filename to parse
    
    Returns:
        data (namedtuple): results of the file parsing (see summary)
        
    Examples:
    let file = 'C:/Users/franc/PVcharacterization_files/JINERGY3272023326035_0200W_T2.csv'
    we obtain:
        FileInfo.power = 200
        FileInfo.time = "T2"
        FileInfo.time = "JINERGY3272023326035"
    
    '''
    #Standard library imports
    from collections import namedtuple
    import re
    
    FileNameInfo = namedtuple('FileNameInfo','power time name')
    re_power = re.compile(r'(?<=\_)\d{4}(?=W\_)') 
    re_time = re.compile(r'(?<=\_)T\d{1}(?=\.)') 
    re_name = re.compile(r'JINERGY\d{13}(?=\_)')
    
    FileInfo = FileNameInfo(power=int(re.findall(re_power,file)[0]),
                            time=re.findall(re_time,file)[0],
                            name=re.findall(re_name,file)[0])
    return FileInfo

In [None]:
file = 'C:/Users/franc/PVcharacterization_files/JINERGY3272023326035_0200W_T2.csv'
result = parse_filename(file)
print(result)


In [None]:
from collections import namedtuple
valid_times = ['T'+str(i) for i in range(10)]
DataPV = namedtuple('DataPV',['data','file', 'file_data'])
for filename in filenames:
    result = DataPV(data=data_parsing_2(filename),
                file=file,
                file_data=parse_filename(filename))
    time = valid_times[valid_times.index(result.file_data.time)]

    locals()[time] = result


In [None]:
filename