In [1]:
from joblib import Parallel, delayed

from matplotlib import pyplot as plt
from os import listdir
import pandas as pd

import tensorflow as tf
import numpy as np

from fsmodels import FrequencyScan

%matplotlib inline

In [2]:
def get_file_names(datasets_folder):
    return [datasets_folder + '/' + _ for _ in listdir(datasets_folder)]

In [3]:
def read_datasets(file_names):
    return [[f_name, pd.read_csv(f_name,
                                 header=0,
                                 parse_dates=[0],
                                 infer_datetime_format=True)]
            for f_name in file_names]

In [4]:
def fit_model(df):
    
    freq_values = df.frequency_hz.to_numpy()
    freq_powers = np.log10(freq_values)
    
    dlts_values = df.dlts_pf.to_numpy()
    f_pulse_value = float(df.f_pulse[0]) * 10 ** (-6)
    
    
    max_abs_index = np.absolute(dlts_values).argmax()
    extrem_val = dlts_values[max_abs_index]
    
    new_max_dlts = freq_powers.max()

    normalize = lambda x: x / extrem_val * new_max_dlts
    denormalize = lambda x: x * extrem_val / new_max_dlts
    
    dlts_values = normalize(dlts_values)

    initial_time_constant = -freq_powers[max_abs_index]
    initial_amplitude = dlts_values[max_abs_index]
    
    
    initial_model = FrequencyScan(filling_pulse=f_pulse_value,
                                  time_constant_power=initial_time_constant,
                                  amplitude=denormalize(initial_amplitude),
                                  stop_val=10 ** -10,
                                 )
    
    final_model = FrequencyScan(filling_pulse=f_pulse_value,
                                time_constant_power=initial_time_constant,
                                amplitude=initial_amplitude,
                                stop_val=10 ** -10,
                                verbose=False
                               )
    
    history = final_model.fit(freq_powers, 
                              dlts_values,
                             )
    
    
    dlts_values = denormalize(dlts_values)
    final_model.amplitude = denormalize(final_model.amplitude)
    
    return initial_model, final_model, history

In [5]:
def get_additional_text(df, initial_model, final_model):

    def text_params(model, actual_dlts, frequency_powers):
        time_constant_power = model.time_constant_power
        f_pulse = model.filling_pulse
        p = model.p_coef
        amp = model.amplitude
        mse = np.square(actual_dlts - model(frequency_powers)).mean()

        text = '\n'.join(['$\\log_{10}(\\tau)$ = ' + f'{time_constant_power:.4f} ' + '$\\log_{10}$(с)',
                          f'$\\tau$ = {10**time_constant_power:.4e} с',
                          f'$A$ = {amp:.4e} пФ',
                          f'$p$ = {p:.4f}',
                          f'MSE = {mse:.4e} $пФ^2$',
                          f'RMSE = {np.sqrt(mse):.4e} пФ'
                         ])

        return text

    
    frequency_powers = np.log10(df.frequency_hz.to_numpy())
    dlts_values = df.dlts_pf.to_numpy()
    f_pulse = df.f_pulse[0] * 10 ** (-6)
    
    text_1 = '\n'.join([f'Образец: {df.specimen_name[0]}',
                        f'$T$ = {df.temperature_k.mean():.1f} К',
                        f'$U_1$={df.u1[0]} В',
                        f'$U_R$={df.ur[0]} В',
                        f'$t_1$ = {f_pulse:.4e} с'
                       ])
    
    text_2 = '\n'.join(['Начальные значения:', text_params(initial_model, dlts_values, frequency_powers)])
    text_3 = '\n'.join(['Конечные значения:', text_params(final_model, dlts_values, frequency_powers)])
    
    return text_1, text_2, text_3


def print_results(df, initial_model, final_model, history):
    
    frequency = df.frequency_hz.to_numpy()
    frequency_powers = np.log10(frequency)
    actual_dlts = df.dlts_pf.to_numpy()
    
    fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(15, 5))
        
    ax0.plot(frequency_powers, actual_dlts, '.g', label="Эксперимент", alpha=0.7)
    ax0.plot(frequency_powers, initial_model(frequency_powers), '-.b', label="Начальная модеь", alpha=0.5)
    ax0.plot(frequency_powers, final_model(frequency_powers), 'r', label="Конечная модель")
    
    ax0.set_xlabel(r'$\log_{10}(F_0), \log_{10}$(Гц)')
    ax0.set_ylabel('DLTS, пФ')
    ax0.legend()
    ax0.grid()

    loss = np.array(history['loss'])
    norm_loss = loss/loss.max()

    ax1.plot(norm_loss)
    ax1.set_xlabel('Итерации')
    ax1.set_ylabel('Нормализованная MSE')
    plt.ylim([0, max(plt.ylim())])
    ax1.grid()
    
    title = f'{df.specimen_name[0]} T={df.temperature_k.mean():.1f} K, $U_1$={df.u1[0]} V, $U_R$={df.ur[0]} V'
    ax0.set_title(title)
    
    text_1, text_2, text_3 = get_additional_text(df, initial_model, final_model)
    text = '\n\n'.join([text_1, text_3])
    
    
    x = 0.4 * (max(ax1.get_xlim()) - min(ax1.get_xlim())) + min(ax1.get_xlim())
    y = 0.95 * max(ax1.get_ylim())
    fontsize=10
    bbox_dict = {'facecolor':'white', 'alpha':0.8, 'edgecolor':'gray'}
        
    ax1.text(x, y, text, fontsize=fontsize, verticalalignment='top', bbox=bbox_dict)
    
    return fig, (ax0, ax1)

In [6]:
DATASET_PATH = '../dataset'
PLOTS_PATH = '../plots'
MODELS_PATH = '../models'

In [7]:
fnames = get_file_names(DATASET_PATH)
df_list = read_datasets(fnames)

In [8]:
def batch_processing(f_name, df):
    
    initial_model, final_model, history = fit_model(df)
    
    fig, ax = print_results(df=df,
                            initial_model = initial_model,
                            final_model = final_model,
                            history = history
                           )

    frequency_powers = np.log10(df.frequency_hz.to_numpy())
    
    model_df = df.copy()
    model_df['dlts_pf_model'] = final_model(frequency_powers)
    model_df['p_coef_model'] = final_model.p_coef
    
    if model_df['p_coef_model'].isna().any():
        message = MODELS_PATH + '/' + f_name.split('/')[-1].rstrip('.csv') + '_model - ERROR'
    else:
        message = MODELS_PATH + '/' + f_name.split('/')[-1].rstrip('.csv') + '_model - OK'
    
    model_df['time_constant_power_model'] = final_model.time_constant_power
    model_df['time_constant_model'] = 10 ** final_model.time_constant_power
    model_df['amplitude_model'] = final_model.amplitude
    
    mse = np.square(df.dlts_pf.to_numpy() - final_model(frequency_powers)).mean()
    model_df['rmse_model'] = np.sqrt(mse)
    
    file_name = MODELS_PATH + '/' + f_name.split('/')[-1].rstrip('.csv') + '_model' + '.csv'
    model_df.to_csv(file_name, index=False)
    
    file_name = PLOTS_PATH + '/' + f_name.split('/')[-1].rstrip('.csv') + '_model' + '.pdf'
    plt.savefig(file_name, bbox_inches='tight')
    
    file_name = PLOTS_PATH + '/' + f_name.split('/')[-1].rstrip('.csv') + '_model' + '.jpg'
    plt.savefig(file_name, bbox_inches='tight')
    
    plt.close('all')
    
    return message
    
    
messages = Parallel(n_jobs=-1)(delayed(batch_processing)(f_name, df) for f_name, df in df_list)
messages

['../models/КТ117№1_п1(база 2)_2500Гц-1Гц_1пФ_+10С_-1В-2В_100мВ_20мкс_шаг_0,1_model - OK',
 '../models/КТ117№1_п1(база 2)_2500Гц-1Гц_1пФ_+10С_-1В-3В_200мВ_20мкс_шаг_0,1_model - OK',
 '../models/КТ117№1_п1(база 2)_2500Гц-1Гц_1пФ_+10С_-1В-4В_200мВ_20мкс_шаг_0,1_model - OK']