# Lib import

In [45]:
import pandas as pd
import numpy as np
import scipy.stats as sps
import seaborn as sns

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d  
from matplotlib import cm

# Загрузка данных и просмотр их первоначальной структуры

Загружаем датасет с GitHub:

In [46]:
import zipfile
import os

!wget --no-check-certificate \
    "https://github.com/PolMix/nems_ai/archive/refs/heads/main.zip" \
    -O "/tmp/data.zip"


zip_ref = zipfile.ZipFile('/tmp/data.zip', 'r') #Opens the zip file in read mode
zip_ref.extractall('/tmp') #Extracts the files into the /tmp folder
zip_ref.close()

--2023-05-13 11:15:22--  https://github.com/PolMix/nems_ai/archive/refs/heads/main.zip
Resolving github.com (github.com)... 140.82.112.3
Connecting to github.com (github.com)|140.82.112.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://codeload.github.com/PolMix/nems_ai/zip/refs/heads/main [following]
--2023-05-13 11:15:23--  https://codeload.github.com/PolMix/nems_ai/zip/refs/heads/main
Resolving codeload.github.com (codeload.github.com)... 140.82.114.9
Connecting to codeload.github.com (codeload.github.com)|140.82.114.9|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘/tmp/data.zip’

/tmp/data.zip           [      <=>           ]  12.69M  7.74MB/s    in 1.6s    

2023-05-13 11:15:24 (7.74 MB/s) - ‘/tmp/data.zip’ saved [13310665]



In [47]:
%cd /tmp/nems_ai-main/Data
filename = 'Dataset.txt'
df = pd.read_fwf(filename)

/tmp/nems_ai-main/Data


"Читаем" датасет:

In [48]:
df.head()

Unnamed: 0,Beam length (um),Beam width (nm),Thickness_1 (nm),Thickness_2 (nm),Temperature (K),Distance (nm),Gate voltage (V),Pretension (Pa),Eigenfrequency (Hz),Quality factor (1),Effective mass,Noise (kg^2 / s^3),TED (W)
0,10.666186,759.454713,192.911166,46.497924,19.089801,465.822644,28.323179,900356700.0,6.415102308860925E7+2400.623397410659i,13361.33,2.362616e-15,1.0511710000000001e-22,2.602258e-12
1,10.666186,759.454713,192.911166,46.497924,19.089801,465.822644,28.323179,900356700.0,1.6011837483039114E8+6638.939975583381i,12059.03,2.589984e-15,3.186781e-22,2.991534e-12
2,10.666186,759.454713,192.911166,46.497924,19.089801,465.822644,28.323179,900356700.0,2.909667084628352E8+12724.292499287933i,11433.51,2.560363e-15,6.037978e-22,3.40943e-12
3,10.666186,759.454713,192.911166,46.497924,19.089801,465.822644,28.323179,900356700.0,3.96451977228644E8+19548.92557221195i,10139.99,8.780197e-16,3.181145e-22,4.959458e-12
4,105.195552,890.696186,209.252268,37.407275,3.716553,314.636835,30.305477,509084900.0,2569354.395333146+0.17898817427683494i,7177442.0,3.216064e-14,2.1353440000000002e-28,7.976759e-12


# Функции для обработки датасета

Функция, реализующая трансформацию датасета 4row x 1col --> 1row x 4col:

In [49]:
def process_data(df, len_common):
    # num_common - количество стоблцов, одинаковых для всех мод
    data = []
    for index in range(df.shape[0]):
        if index % 4 == 0:
            i = 0
            row = df.iloc[index].values.flatten().tolist()
        else:
            row += df.iloc[index, len_common:].values.flatten().tolist()
        i += 1
        if i == 4:
            data.append(row)
    return data

Функция, реализующая замену индексовых имен колонок на буквенные:

In [50]:
def name_data_columns(df, data, len_common):
    '''
    Names columns in data DataFrame.
    - df - initial DataFrame of 4row1col format that has named columns
    - data - new DataFrame of 1row4col format which columns are to be named
    - len_common - number of columns that are common for all resonant modes
    '''
    len_differ = df.shape[1] - len_common # число различающихся колонок

    cols_common = list(df.columns)[:len_common]
    cols_differ = list(df.columns)[len_common:]
    print(len_differ, len(cols_differ))

    columns = [] # сюда будем записывать финальный вариант колонок.
    # Прим.: pandas не дает изменять колонки внутри цикла,
    # их можно изменить только единственным присваиванием, 
    # именно поэтому сначала все append-им в один список
    
    # проименовываем общие колонки:
    for col_index in range(0, len_common):
        columns.append(cols_common[col_index])
    
    # проименовываем различающиеся колонки:
    for mode_index in range(1, 5):
        for col_index in range(0, len_differ):
            columns.append(f'M{mode_index} ' + cols_differ[col_index])

    # Присваиваем имена колонок
    data.columns = columns

    return data

Функция, возвращающая индексы строк (и столбцов, если рассматривать разные моды колебаний), в которых детектирована аномалия $ Im\left(f_0\right) < 0$:

In [51]:
def return_neg_frequencies_index(df, freq_indices):
    '''
    Returns indices of the objects having negative values of 
    imaginary part of resonant frequency which results in negative quality factor.
    - df - The DataFrame to be checked
    - freq_indices - indices of resonant frequencies in df
    '''
    list_i = []
    list_j = []
    for j in freq_indices:
        for i in range(df.shape[0]):
            if '-' in df.iloc[i,j]:
                list_i.append(i)
                list_j.append(j)
    return list_i, list_j

Функция, реализующая удаление описанных выше аномалий:

In [52]:
def del_neg_frequencies(df, freq_indices):
    '''
    Deletes objects that have negative imaginary part of resonant frequency.
    - df - DataFrame to be processed
    - freq_indices - indices of resonant frequencies in df
    '''
    lst_i, _ = return_neg_frequencies_index(data, freq_indices)
    lst_i = [x for x in set(lst_i)]
    lst_i.sort(reverse=True)
    for i in lst_i:
        df.drop(index=i, inplace=True)
    return df

Функция, реализующая удаление мнимой части резонансной частоты для всех элементов датасета:

In [53]:
def del_im_frequency(df, freq_indices):
    for j in freq_indices:
        for i in range(0, df.shape[0]):
            df.iloc[i, j] = float(str(df.iloc[i, j]).split('+', 1)[0])
    return df

Функция, реализующая округление чисел до `decimals` знаков после запятой (потому что метод конечных элементов иногда, например, вместо числа `200` записывает `200.0000000111`):

In [54]:
def round_data_float(data, decimals):
    return data.round(decimals=decimals)

Функция, реализующая округление значений до типа `int`, которые должны были быть записаны методом конечных элементов как `int`, но были записаны как `float`:

In [55]:
def round_data_int(data, cols_indices_to_round):
    for col_index in cols_indices_to_round:
        data.iloc[:, col_index] = data.iloc[:, col_index].astype(int)

# Обработка датасета

Преобразование 4row x 1col --> 1row x 4col:

In [56]:
data = pd.DataFrame(process_data(df, len_common=8))
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,26,27
0,10.666186,759.454713,192.911166,46.497924,19.089801,465.822644,28.323179,900356700.0,6.415102308860925E7+2400.623397410659i,13361.33,...,2.909667084628352E8+12724.292499287933i,11433.51,2.560363e-15,6.037978e-22,3.40943e-12,3.96451977228644E8+19548.92557221195i,10139.99,8.780197e-16,3.181145e-22,4.959458e-12
1,105.195552,890.696186,209.252268,37.407275,3.716553,314.636835,30.305477,509084900.0,2569354.395333146+0.17898817427683494i,7177442.0,...,8224138.945881522+0.7163319699598331i,5740452.0,3.281744e-14,8.720428e-28,1.367533e-11,1.1522259804500692E7+1.1514823841560045i,5003229.0,3.291971e-14,1.406151e-27,1.583936e-11
2,92.051879,213.886957,78.837771,23.255635,0.017793,266.965778,0.0,981196300.0,3534070.7381136324+0.014998583641082577i,117813500.0,...,1.0633838931726312E7+0.048525947723057454i,109568600.0,2.955338e-15,3.326099e-32,9.50356e-13,1.4215267434397759E7-1.7026735618501154i,4174396.0,2.95539e-15,1.167079e-30,1.919962e-12
3,10.310316,221.113532,195.903798,37.079461,0.027053,251.500693,26.675832,635136300.0,3.3531096619837992E7+6.265918625790593i,2675673.0,...,1.2676424894250964E8+33.51392465609216i,1891218.0,7.20607e-16,6.788349e-28,2.254001e-12,1.9236968865737534E8+56.254152580472244i,1709827.0,7.147747e-16,1.130223e-27,2.477637e-12
4,18.683573,282.662136,207.614117,18.750307,0.496423,172.245797,2.647445,0.0,1.0490776466649445E7+0.5979256153162141i,8772643.0,...,4.475570525582115E7+2.685519572702211i,8332783.0,1.623836e-15,6.8497690000000005e-28,2.943955e-12,7.057575996260074E7+4.410434723934928i,8000998.0,1.616668e-15,1.1199730000000001e-27,3.081568e-12


Форматирование названий колонок:

In [57]:
data = name_data_columns(df, data, len_common=8)
data.head()

5 5


Unnamed: 0,Beam length (um),Beam width (nm),Thickness_1 (nm),Thickness_2 (nm),Temperature (K),Distance (nm),Gate voltage (V),Pretension (Pa),M1 Eigenfrequency (Hz),M1 Quality factor (1),...,M3 Eigenfrequency (Hz),M3 Quality factor (1),M3 Effective mass,M3 Noise (kg^2 / s^3),M3 TED (W),M4 Eigenfrequency (Hz),M4 Quality factor (1),M4 Effective mass,M4 Noise (kg^2 / s^3),M4 TED (W)
0,10.666186,759.454713,192.911166,46.497924,19.089801,465.822644,28.323179,900356700.0,6.415102308860925E7+2400.623397410659i,13361.33,...,2.909667084628352E8+12724.292499287933i,11433.51,2.560363e-15,6.037978e-22,3.40943e-12,3.96451977228644E8+19548.92557221195i,10139.99,8.780197e-16,3.181145e-22,4.959458e-12
1,105.195552,890.696186,209.252268,37.407275,3.716553,314.636835,30.305477,509084900.0,2569354.395333146+0.17898817427683494i,7177442.0,...,8224138.945881522+0.7163319699598331i,5740452.0,3.281744e-14,8.720428e-28,1.367533e-11,1.1522259804500692E7+1.1514823841560045i,5003229.0,3.291971e-14,1.406151e-27,1.583936e-11
2,92.051879,213.886957,78.837771,23.255635,0.017793,266.965778,0.0,981196300.0,3534070.7381136324+0.014998583641082577i,117813500.0,...,1.0633838931726312E7+0.048525947723057454i,109568600.0,2.955338e-15,3.326099e-32,9.50356e-13,1.4215267434397759E7-1.7026735618501154i,4174396.0,2.95539e-15,1.167079e-30,1.919962e-12
3,10.310316,221.113532,195.903798,37.079461,0.027053,251.500693,26.675832,635136300.0,3.3531096619837992E7+6.265918625790593i,2675673.0,...,1.2676424894250964E8+33.51392465609216i,1891218.0,7.20607e-16,6.788349e-28,2.254001e-12,1.9236968865737534E8+56.254152580472244i,1709827.0,7.147747e-16,1.130223e-27,2.477637e-12
4,18.683573,282.662136,207.614117,18.750307,0.496423,172.245797,2.647445,0.0,1.0490776466649445E7+0.5979256153162141i,8772643.0,...,4.475570525582115E7+2.685519572702211i,8332783.0,1.623836e-15,6.8497690000000005e-28,2.943955e-12,7.057575996260074E7+4.410434723934928i,8000998.0,1.616668e-15,1.1199730000000001e-27,3.081568e-12


Вывод информации об аномалиях мнимой части резонансной частоты, их удаление:

In [58]:
frequency_indices = [8, 13, 18, 23] # индексы колонок, в которых содержатся значения резонансных частот

# Детектирование аномалий:
lst_i, lst_j = return_neg_frequencies_index(data, frequency_indices)
print("Row and column indices of detected anomalies of imaginary part of resonant frequency:")
print(lst_i, lst_j)
print(f"The length of the dataset is {data.shape[0]}")

# Удаление аномалий:
data = del_neg_frequencies(data, frequency_indices)
lst_i, lst_j = return_neg_frequencies_index(data, frequency_indices)
print(f"Anomalies have been deleted, the length of the dataset is {data.shape[0]}")

Row and column indices of detected anomalies of imaginary part of resonant frequency:
[128, 176, 193, 198, 268, 301, 325, 343, 345, 350, 431, 463, 468, 476, 496, 498, 503, 507, 529, 592, 593, 630, 682, 692, 744, 755, 808, 827, 832, 833, 865, 894, 896, 972, 1007, 1052, 1080, 1217, 1313, 1315, 1322, 1336, 1346, 1367, 1380, 1406, 1476, 1491, 1529, 1539, 1545, 1603, 1610, 1611, 1613, 1652, 1663, 1673, 1693, 1721, 1729, 1743, 1789, 1824, 1830, 1844, 1845, 1871, 1877, 1956, 1984, 2071, 2081, 2083, 2100, 2115, 2124, 2181, 2189, 2277, 2312, 2316, 2343, 2360, 2411, 2430, 2506, 2544, 2598, 2654, 2888, 2903, 2909, 2947, 2990, 3078, 3111, 3232, 3252, 3362, 3421, 3539, 3555, 3569, 3642, 3655, 3673, 3694, 3701, 3715, 3797, 3898, 4013, 4027, 4075, 4079, 4159, 4185, 4220, 4236, 4240, 4313, 4319, 4323, 4336, 4393, 4410, 4434, 4447, 4487, 4504, 4510, 4520, 4529, 4537, 4542, 4584, 4611, 4633, 4667, 4701, 4752, 4840, 4893, 4948, 4953, 5000, 5095, 5101, 5140, 5153, 5243, 5277, 5283, 5287, 5297, 128, 193, 1

Удаление мнимой части всех элементов датасета:

In [59]:
data = del_im_frequency(data, [8, 13, 18, 23])
data.head()

Unnamed: 0,Beam length (um),Beam width (nm),Thickness_1 (nm),Thickness_2 (nm),Temperature (K),Distance (nm),Gate voltage (V),Pretension (Pa),M1 Eigenfrequency (Hz),M1 Quality factor (1),...,M3 Eigenfrequency (Hz),M3 Quality factor (1),M3 Effective mass,M3 Noise (kg^2 / s^3),M3 TED (W),M4 Eigenfrequency (Hz),M4 Quality factor (1),M4 Effective mass,M4 Noise (kg^2 / s^3),M4 TED (W)
0,10.666186,759.454713,192.911166,46.497924,19.089801,465.822644,28.323179,900356700.0,64151023.088609,13361.33,...,290966708.462835,11433.51,2.560363e-15,6.037978e-22,3.40943e-12,396451977.228644,10139.99,8.780197e-16,3.181145e-22,4.959458e-12
1,105.195552,890.696186,209.252268,37.407275,3.716553,314.636835,30.305477,509084900.0,2569354.395333,7177442.0,...,8224138.945882,5740452.0,3.281744e-14,8.720428e-28,1.367533e-11,11522259.804501,5003229.0,3.291971e-14,1.406151e-27,1.583936e-11
3,10.310316,221.113532,195.903798,37.079461,0.027053,251.500693,26.675832,635136300.0,33531096.619838,2675673.0,...,126764248.94251,1891218.0,7.20607e-16,6.788349e-28,2.254001e-12,192369688.657375,1709827.0,7.147747e-16,1.130223e-27,2.477637e-12
4,18.683573,282.662136,207.614117,18.750307,0.496423,172.245797,2.647445,0.0,10490776.466649,8772643.0,...,44755705.255821,8332783.0,1.623836e-15,6.8497690000000005e-28,2.943955e-12,70575759.962601,8000998.0,1.616668e-15,1.1199730000000001e-27,3.081568e-12
5,121.747842,991.550766,102.506945,29.297485,2.037235,415.471216,18.419812,837421300.0,2649738.745159,7314592.0,...,8269130.657285,6082613.0,2.259615e-14,2.331664e-28,2.51865e-11,11381108.823312,5405428.0,2.267905e-14,3.624442e-28,2.982471e-11


Дополнительно удалим строку с индексом `2`, потому что в ячейке `[2, 'M1 TED (W)']` содержится аномалия в данных (неопознанное вторжение символа `s` в `float`-значение) (поймано на графиках распределений):

In [60]:
#data = data.drop(2)
#print(df.iloc[0:3, 17])

In [61]:
data.describe()

Unnamed: 0,Beam length (um),Beam width (nm),Thickness_1 (nm),Thickness_2 (nm),Temperature (K),Distance (nm),Gate voltage (V),Pretension (Pa),M1 Quality factor (1),M1 Effective mass,...,M2 Noise (kg^2 / s^3),M2 TED (W),M3 Quality factor (1),M3 Effective mass,M3 Noise (kg^2 / s^3),M3 TED (W),M4 Quality factor (1),M4 Effective mass,M4 Noise (kg^2 / s^3),M4 TED (W)
count,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,...,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0
mean,195.469415,529.097516,150.819856,30.40434,23.742353,349.708484,17.227796,471234600.0,25026430.0,2.791775e-14,...,1.034791e-23,5.672434e-12,22199080.0,2.808115e-14,2.0142070000000003e-23,7.943504e-12,21157280.0,2.806848e-14,1.691331e-23,1.017113e-11
std,238.66689,269.281493,41.982835,11.712403,49.683629,145.560699,12.786909,319086300.0,83291480.0,4.391482e-14,...,2.0476380000000001e-22,5.494667e-12,73447450.0,4.388424e-14,3.8430140000000003e-22,7.561086e-12,86664400.0,4.39117e-14,2.5457230000000003e-22,9.70403e-12
min,10.006357,50.114701,66.096716,10.05581,0.010004,100.222269,0.0,0.0,1409.866,1.256945e-16,...,5.8652509999999995e-34,1.153546e-14,1409.853,1.263636e-16,8.801085e-34,3.347321e-14,1409.842,1.266484e-16,9.123322e-34,7.378453e-14
25%,36.605352,298.766024,107.242205,20.117549,0.309281,220.216016,5.140889,236618000.0,294304.5,3.492639e-15,...,6.930178e-30,1.884886e-12,253763.7,3.638416e-15,1.125043e-29,2.898015e-12,229653.7,3.614451e-15,1.9982180000000002e-29,3.819665e-12
50%,97.149246,526.46181,150.53747,30.610468,2.657895,349.938323,17.00521,477799200.0,1872628.0,1.0328e-14,...,5.438185e-28,4.082376e-12,1620164.0,1.061271e-14,9.200261e-28,5.52139e-12,1356222.0,1.062533e-14,1.5035640000000001e-27,7.042181e-12
75%,257.772109,758.297463,193.682337,40.815722,19.179475,476.299101,28.608417,747759500.0,12212930.0,3.247702e-14,...,3.186328e-26,7.780666e-12,10713710.0,3.272321e-14,5.545136e-26,1.055391e-11,9048577.0,3.280904e-14,8.259448e-26,1.33468e-11
max,1195.787212,999.773967,234.883842,49.992126,299.880837,599.954225,39.995581,999494600.0,1467439000.0,4.32697e-13,...,1.062324e-20,6.391638e-11,1207506000.0,4.327311e-13,1.952995e-20,7.927833e-11,2734598000.0,4.327028e-13,9.359624e-21,1.112834e-10


Задаем ограничения на значения параметров:

In [62]:
def catch_parameter_overflow(df, param_name, lower_limit, upper_limit):
    mask = (df[param_name].astype(float) > lower_limit) & (df[param_name].astype(float) < upper_limit)
    return df[mask]

def catch_overflow(df, param_limits):

    # Это кусок кода на случай, если на рез. частоту ограничений не накладывается, 
    #   но в любом случае надо конвертировать данные в тип float
    if 'Eigenfrequency (Hz)' not in param_limits.keys():
        for mode_number in range(1, 5):
            df.loc[:, f'M{mode_number} ' + 'Eigenfrequency (Hz)'] = df.loc[:, f'M{mode_number} ' + 'Eigenfrequency (Hz)'].astype(float)

    for param in param_limits.keys():
        for mode_number in range(1, 5):

            # Преобразуем тип резонансной частоты в тип float
            if param == 'Eigenfrequency (Hz)':
                df.loc[:, f'M{mode_number} ' + param] = df.loc[:, f'M{mode_number} ' + param].astype(float)

            # Последовательно накладываем маски ограничений
            df = catch_parameter_overflow(df, param_name=f'M{mode_number} ' + param,
                                          lower_limit=param_limits[param][0],
                                          upper_limit=param_limits[param][1])
    return df

In [63]:
param_limits = {#'Eigenfrequency (Hz)': [0, 3e7], 
                #'Quality factor': [0, 5e8], 
                #'TED (W)': [1e-20, 1]
                }

print('Dataset length before anomaly check is {length}'.format(length=data.shape[0]))
data_catched = catch_overflow(data, param_limits)
print('Dataset length after anomaly check is {length}'.format(length=data_catched.shape[0]))
data_catched.describe()

Dataset length before anomaly check is 4909
Dataset length after anomaly check is 4909


  df.loc[:, f'M{mode_number} ' + 'Eigenfrequency (Hz)'] = df.loc[:, f'M{mode_number} ' + 'Eigenfrequency (Hz)'].astype(float)


Unnamed: 0,Beam length (um),Beam width (nm),Thickness_1 (nm),Thickness_2 (nm),Temperature (K),Distance (nm),Gate voltage (V),Pretension (Pa),M1 Eigenfrequency (Hz),M1 Quality factor (1),...,M3 Eigenfrequency (Hz),M3 Quality factor (1),M3 Effective mass,M3 Noise (kg^2 / s^3),M3 TED (W),M4 Eigenfrequency (Hz),M4 Quality factor (1),M4 Effective mass,M4 Noise (kg^2 / s^3),M4 TED (W)
count,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,...,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0,4909.0
mean,195.469415,529.097516,150.819856,30.40434,23.742353,349.708484,17.227796,471234600.0,6988914.0,25026430.0,...,28046850.0,22199080.0,2.808115e-14,2.0142070000000003e-23,7.943504e-12,41733750.0,21157280.0,2.806848e-14,1.691331e-23,1.017113e-11
std,238.66689,269.281493,41.982835,11.712403,49.683629,145.560699,12.786909,319086300.0,10984010.0,83291480.0,...,50091510.0,73447450.0,4.388424e-14,3.8430140000000003e-22,7.561086e-12,74181090.0,86664400.0,4.39117e-14,2.5457230000000003e-22,9.70403e-12
min,10.006357,50.114701,66.096716,10.05581,0.010004,100.222269,0.0,0.0,38402.54,1409.866,...,115310.9,1409.853,1.263636e-16,8.801085e-34,3.347321e-14,153871.0,1409.842,1.266484e-16,9.123322e-34,7.378453e-14
25%,36.605352,298.766024,107.242205,20.117549,0.309281,220.216016,5.140889,236618000.0,873070.0,294304.5,...,2650417.0,253763.7,3.638416e-15,1.125043e-29,2.898015e-12,3557429.0,229653.7,3.614451e-15,1.9982180000000002e-29,3.819665e-12
50%,97.149246,526.46181,150.53747,30.610468,2.657895,349.938323,17.00521,477799200.0,2470389.0,1872628.0,...,7744980.0,1620164.0,1.061271e-14,9.200261e-28,5.52139e-12,10597020.0,1356222.0,1.062533e-14,1.5035640000000001e-27,7.042181e-12
75%,257.772109,758.297463,193.682337,40.815722,19.179475,476.299101,28.608417,747759500.0,7668586.0,12212930.0,...,26935890.0,10713710.0,3.272321e-14,5.545136e-26,1.055391e-11,39326190.0,9048577.0,3.280904e-14,8.259448e-26,1.33468e-11
max,1195.787212,999.773967,234.883842,49.992126,299.880837,599.954225,39.995581,999494600.0,81156600.0,1467439000.0,...,367539200.0,1207506000.0,4.327311e-13,1.952995e-20,7.927833e-11,439933600.0,2734598000.0,4.327028e-13,9.359624e-21,1.112834e-10


Дополнительно: еще раз изменим тип всех данных на `float`:

In [64]:
for param in list(data_catched.columns)[:8]:
    data_catched.loc[:, param] = data_catched.loc[:, param].astype(float)

for row_index in range(data_catched.shape[0]):
    if data_catched.loc[data_catched.index[row_index], 'Gate voltage (V)'] == '5.630590021610321E':
        print(row_index)

for param in list(data_catched.columns)[8:]:
    data_catched.loc[:, param] = data_catched.loc[:, param].astype(float)

# Сохраним датасет на диск:

In [65]:
data_catched.columns = ['Beam length (um)', 'Beam width (nm)', 'Thickness_1 (nm)', 'Thickness_2 (nm)', 
                        'Temperature (K)', 'Distance (nm)', 'Gate voltage (V)', 'Pretension (Pa)',
                        'M1 Eigenfrequency (Hz)', 'M1 Quality factor', 'M1 Effective mass (kg)', 'M1 TED (W)', 'M1 Noise (kg^2/s^3)',
                        'M2 Eigenfrequency (Hz)', 'M2 Quality factor', 'M2 Effective mass (kg)', 'M2 TED (W)', 'M2 Noise (kg^2/s^3)',
                        'M3 Eigenfrequency (Hz)', 'M3 Quality factor', 'M3 Effective mass (kg)', 'M3 TED (W)', 'M3 Noise (kg^2/s^3)',
                        'M4 Eigenfrequency (Hz)', 'M4 Quality factor', 'M4 Effective mass (kg)', 'M4 TED (W)', 'M4 Noise (kg^2/s^3)']

In [66]:
data_catched.to_csv('Dataset_Processed.csv')

# Добавление вклада термоупругих потерь

Подмешаем к добротности в датасете добротность согласно приближению Roukes:

In [22]:
# Fundamental properties:
rho = [3100, 2700] # density of matter
E = [250e9, 70e9] # Young's modulus
k = [20, 237] # Thermal conductivity constant
alpha = [2.3e-6, 23.1e-6] # Thermal expansion coefficient

# Calculated properties:
Cp = [700, 904] # heat capacity at constant pressure

In [23]:
def calculate_Q_TED(df):
    param_history = {'Cp_av': [], 'E_av': [], 'rho_av': [], 'k_av': [], 'alpha_av': [], 'xi_av': [], 'chi': [], 'Q_ted': []}
    for row_index in range(0, df.shape[0]):
        # Calculating average properties of beam:
        Cp_av = (Cp[0] * df.iloc[row_index, 2] + Cp[1] * df.iloc[row_index, 3]) / (df.iloc[row_index, 2] + df.iloc[row_index, 3])
        E_av = (E[0] * df.iloc[row_index, 2] + E[1] * df.iloc[row_index, 3]) / (df.iloc[row_index, 2] + df.iloc[row_index, 3])
        rho_av = (rho[0] * df.iloc[row_index, 2] + rho[1] * df.iloc[row_index, 3]) / (df.iloc[row_index, 2] + df.iloc[row_index, 3])
        k_av = (k[0] * df.iloc[row_index, 2] + k[1] * df.iloc[row_index, 3]) / (df.iloc[row_index, 2] + df.iloc[row_index, 3])
        alpha_av = (alpha[0] * df.iloc[row_index, 2] + alpha[1] * df.iloc[row_index, 3]) / (df.iloc[row_index, 2] + df.iloc[row_index, 3])
        xi_av = k_av / (rho_av * Cp_av)
        param_history['Cp_av'].append(Cp_av)
        param_history['E_av'].append(E_av)
        param_history['rho_av'].append(rho_av)
        param_history['k_av'].append(k_av)
        param_history['alpha_av'].append(alpha_av)
        param_history['xi_av'].append(xi_av)

        for mode_number in range(1, 5):
            # Calculating TED quality factor:
            chi = df.iloc[row_index, 1] * 1e-9 * np.sqrt(2 * 3.1415 * df.loc[df.index[row_index], f'M{mode_number} Eigenfrequency (Hz)'] / (2 * xi_av))
            delta_ted = (E_av * alpha_av **2 * df.iloc[row_index, 4]) / (Cp_av * rho_av) * (6/(chi**2) - 6 * (np.sinh(chi) + np.sin(chi))/(chi**3 * (np.cosh(chi) + np.cos(chi))))
            df.loc[df.index[row_index], f'M{mode_number} Quality factor'] = 1/(1/df.loc[df.index[row_index], f'M{mode_number} Quality factor'] + delta_ted)

            param_history['chi'].append(chi)
            param_history['Q_ted'].append(1/delta_ted)
        
    return df, param_history

In [24]:
print(data_catched.shape[0])
data_ted, history = calculate_Q_TED(data_catched)

4909


KeyError: ignored

In [None]:
sorted_history = sorted(history['Q_ted'])
print(sorted_history)

In [None]:
data_ted.head()