# Lib import

In [83]:
import pandas as pd
import numpy as np
import scipy.stats as sps
import seaborn as sns

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d  
from matplotlib import cm

# Загрузка данных и просмотр их первоначальной структуры

Монтируем гугл-диск, чтобы загрузить датасет:

In [84]:
from google.colab import drive
drive.mount('/content/drive/')

%cd /content/drive/My Drive/Data/

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
/content/drive/My Drive/Data


"Читаем" датасет:

In [85]:
filename = 'Data.txt'
df = pd.read_fwf(filename)

In [86]:
df.head()

Unnamed: 0,Beam length (um),Beam width (nm),Thickness_1 (nm),Thickness_2 (nm),Temperature (K),Distance (nm),Voltage (V),Eigenfrequency (Hz),Quality factor,Displacement mag (nm),Displacement RMS (nm),Vel mag (m/s),Vel mag RMS (m/s),Acc mag (m/s^2),Acc RMS (m/s^2),Effective mass (kg),Noise (kg^2/s^3),TED (W)
0,551.3921,206.029817,100.0,20.468396,27.337469,809.410905,1.433346,244516.16250606137+17.045496236034406i,7172.456557,0.006348,0.004489,1.965658e-08,7e-06,14.9843087307209,10.595529,2.019349e-14,3.4184680000000004e-27,9.933654107011143e-15
1,551.3921,206.029817,100.0,20.468396,27.337469,809.410905,1.433346,489152.1226144065+34.09921444709965i,7172.483744,0.003167,0.002239,1.019002e-09,7e-06,29.91527936888289,21.153297,2.019677e-14,6.839695e-27,2.6178511809986413e-14
2,551.3921,206.029817,100.0,20.468396,27.337469,809.410905,1.433346,734034.4588814733+51.17015509062039i,7172.486165,0.002104,0.001488,5.643049e-08,7e-06,44.75080303177741,31.644144,2.018266e-14,1.0256649999999999e-26,4.9678389727478896e-14
3,551.3921,206.029817,100.0,20.468396,27.337469,809.410905,1.433346,979304.3930766938+68.28011931669104i,7171.226434,0.001533,0.001111,2.132262e-06,7e-06,58.0291792188436,42.067906,2.016413e-14,1.3673639999999998e-26,8.073928834201975e-14
4,1172.102478,379.651846,200.0,18.53307,3.39314,592.749304,9.807538,112805.816662079+4.305307573811354i,13100.784872,0.043501,0.030765,5.311418e-07,2.2e-05,21.853639607416174,15.45516,1.451218e-13,1.704437e-28,6.023990208675844e-14


# Функции для обработки датасета

Функция, реализующая трансформацию датасета 4row x 1col --> 1row x 4col:

In [87]:
def process_data(df):
    data = []
    for index in range(df.shape[0]):
        if index % 4 == 0:
            i = 0
            row = df.iloc[index].values.flatten().tolist()
        else:
            row += df.iloc[index, 7:].values.flatten().tolist()
        i += 1
        if i == 4:
            data.append(row)
    return data

Функция, реализующая замену индексовых имен колонок на буквенные:

In [88]:
def name_data_columns(df, data, len_common):
    #len_common - число колонок, общих для всех резонансных мод
    len_differ = df.shape[1] - len_common # число различающихся колонок

    cols_common = list(df.columns)[:len_common]
    cols_differ = list(df.columns)[len_common:]
    print(len_differ, len(cols_differ))

    columns = [] # сюда будем записывать финальный вариант колонок.
    # Прим.: pandas не дает изменять колонки внутри цикла,
    # их можно изменить только единственным присваиванием
    
    # проименовываем общие колонки:
    for col_index in range(0, len_common):
        columns.append(cols_common[col_index])
    
    # проименовываем различающиеся колонки:
    for mode_index in range(1, 5):
        for col_index in range(0, len_differ):
            columns.append(f'M{mode_index} ' + cols_differ[col_index])
    data.columns = columns

    return data

Функция, возвращающая индексы строк (и столбцов, если рассматривать разные моды колебаний), в которых детектирована аномалия $ Im\left(f_0\right) < 0$:

In [89]:
def return_neg_frequencies_index(df, freq_indices):
    list_i = []
    list_j = []
    for j in freq_indices:
        for i in range(df.shape[0]):
            if '-' in df.iloc[i,j]:
                list_i.append(i)
                list_j.append(j)
    return list_i, list_j

Функция, реализующая удаление описанных выше аномалий:

In [90]:
def del_neg_frequencies(df, freq_indices):
    lst_i, _ = return_neg_frequencies_index(data, freq_indices)
    lst_i.sort(reverse=True)
    for i in lst_i:
        df.drop(index=i, inplace=True)
    return df

Функция, реализующая удаление мнимой части резонансной частоты для всех элементов датасета:

In [91]:
def del_im_frequency(df, freq_indices):
    for j in freq_indices:
        for i in range(0, df.shape[0]):
            df.iloc[i, j] = float(df.iloc[i, j].split('+', 1)[0])
    return df

Функция, реализующая округление чисел до `decimals` знаков после запятой (потому что метод конечных элементов иногда, например, вместо числа `200` записывает `200.0000000111`):

In [96]:
def round_data_float(data, decimals):
    return data.round(decimals=decimals)

Функция, реализующая округление значений до типа `int`, которые должны были быть записаны методом конечных элементов как `int`, но были записаны как `float`:

In [97]:
def round_data_int(data, cols_indices_to_round):
    for col_index in cols_indices_to_round:
        data.iloc[:, col_index] = data.iloc[:, col_index].astype(int)

# Обработка датасета

Преобразование 4row x 1col --> 1row x 4col:

In [92]:
data = pd.DataFrame(process_data(df))
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,41,42,43,44,45,46,47,48,49,50
0,551.3921,206.029817,100.0,20.468396,27.337469,809.410905,1.433346,244516.16250606137+17.045496236034406i,7172.456557,0.006348,...,7171.226434,0.001533,0.001111,2.132262e-06,7e-06,58.0291792188436,42.067906,2.016413e-14,1.3673639999999998e-26,8.073928834201975e-14
1,1172.102478,379.651846,200.0,18.53307,3.39314,592.749304,9.807538,112805.816662079+4.305307573811354i,13100.784872,0.043501,...,13100.050846,0.025595,0.018105,1.866958e-06,5.1e-05,202.6858673339763,143.368548,1.45077e-13,6.765369000000001e-28,1.8993438467451285e-13
2,162.989741,883.213702,100.0,48.746775,19.573123,467.657697,29.607379,1029052.7937603326+72.17835812291919i,7128.541162,0.001068,...,7128.317477,0.000216,0.000153,3.63494e-10,4e-06,180.55187844824437,127.669459,2.975456e-14,7.811807e-25,3.2009892122408813e-12
3,1125.934896,849.829193,100.0,21.777872,1.963606,459.408015,10.608546,128330.29472177783+8.80380227743946i,7288.344892,0.043401,...,7287.185252,0.043312,0.030649,5.359753e-06,9.9e-05,449.8835468543956,318.349281,1.710741e-13,1.0300600000000001e-27,7.165893734957774e-13
4,506.53647,493.370771,200.0,48.956604,2.728436,836.414502,22.231017,287791.0660363386+9.901594610489772i,14532.56154,0.021416,...,14534.964747,0.004839,0.003422,3.790371e-08,2.5e-05,254.0624259180044,179.649363,9.095386e-14,4.2378400000000004e-27,3.817207692599109e-13


Форматирование названий колонок:

In [93]:
data = name_data_columns(df, data, len_common=7)
data.head()

11 11


Unnamed: 0,Beam length (um),Beam width (nm),Thickness_1 (nm),Thickness_2 (nm),Temperature (K),Distance (nm),Voltage (V),M1 Eigenfrequency (Hz),M1 Quality factor,M1 Displacement mag (nm),...,M4 Quality factor,M4 Displacement mag (nm),M4 Displacement RMS (nm),M4 Vel mag (m/s),M4 Vel mag RMS (m/s),M4 Acc mag (m/s^2),M4 Acc RMS (m/s^2),M4 Effective mass (kg),M4 Noise (kg^2/s^3),M4 TED (W)
0,551.3921,206.029817,100.0,20.468396,27.337469,809.410905,1.433346,244516.16250606137+17.045496236034406i,7172.456557,0.006348,...,7171.226434,0.001533,0.001111,2.132262e-06,7e-06,58.0291792188436,42.067906,2.016413e-14,1.3673639999999998e-26,8.073928834201975e-14
1,1172.102478,379.651846,200.0,18.53307,3.39314,592.749304,9.807538,112805.816662079+4.305307573811354i,13100.784872,0.043501,...,13100.050846,0.025595,0.018105,1.866958e-06,5.1e-05,202.6858673339763,143.368548,1.45077e-13,6.765369000000001e-28,1.8993438467451285e-13
2,162.989741,883.213702,100.0,48.746775,19.573123,467.657697,29.607379,1029052.7937603326+72.17835812291919i,7128.541162,0.001068,...,7128.317477,0.000216,0.000153,3.63494e-10,4e-06,180.55187844824437,127.669459,2.975456e-14,7.811807e-25,3.2009892122408813e-12
3,1125.934896,849.829193,100.0,21.777872,1.963606,459.408015,10.608546,128330.29472177783+8.80380227743946i,7288.344892,0.043401,...,7287.185252,0.043312,0.030649,5.359753e-06,9.9e-05,449.8835468543956,318.349281,1.710741e-13,1.0300600000000001e-27,7.165893734957774e-13
4,506.53647,493.370771,200.0,48.956604,2.728436,836.414502,22.231017,287791.0660363386+9.901594610489772i,14532.56154,0.021416,...,14534.964747,0.004839,0.003422,3.790371e-08,2.5e-05,254.0624259180044,179.649363,9.095386e-14,4.2378400000000004e-27,3.817207692599109e-13


Вывод информации об аномалиях мнимой части резонансной частоты, их удаление:

In [94]:
frequency_indices = [7, 18, 29, 40] # индексы колонок, в которых содержатся значения резонансных частот

# Детектирование аномалий:
lst_i, lst_j = return_neg_frequencies_index(data, frequency_indices)
print("Row and column indices of detected anomalies of imaginary part of resonant frequency:")
print(lst_i, lst_j)
print(f"The length of the dataset is {data.shape[0]}")

# Удаление аномалий:
data = del_neg_frequencies(data, frequency_indices)
lst_i, lst_j = return_neg_frequencies_index(data, frequency_indices)
print(f"Anomalies have been deleted, the length of the dataset is {data.shape[0]}")

Row and column indices of detected anomalies of imaginary part of resonant frequency:
[3571, 3800, 805, 2460] [29, 29, 40, 40]
The length of the dataset is 3999
Anomalies have been deleted, the length of the dataset is 3995


Удаление мнимой части всех элементов датасета:

In [95]:
data = del_im_frequency(data, [7, 18, 29, 40])
data.head()

Unnamed: 0,Beam length (um),Beam width (nm),Thickness_1 (nm),Thickness_2 (nm),Temperature (K),Distance (nm),Voltage (V),M1 Eigenfrequency (Hz),M1 Quality factor,M1 Displacement mag (nm),...,M4 Quality factor,M4 Displacement mag (nm),M4 Displacement RMS (nm),M4 Vel mag (m/s),M4 Vel mag RMS (m/s),M4 Acc mag (m/s^2),M4 Acc RMS (m/s^2),M4 Effective mass (kg),M4 Noise (kg^2/s^3),M4 TED (W)
0,551.3921,206.029817,100.0,20.468396,27.337469,809.410905,1.433346,244516.162506,7172.456557,0.006348,...,7171.226434,0.001533,0.001111,2.132262e-06,7e-06,58.0291792188436,42.067906,2.016413e-14,1.3673639999999998e-26,8.073928834201975e-14
1,1172.102478,379.651846,200.0,18.53307,3.39314,592.749304,9.807538,112805.816662,13100.784872,0.043501,...,13100.050846,0.025595,0.018105,1.866958e-06,5.1e-05,202.6858673339763,143.368548,1.45077e-13,6.765369000000001e-28,1.8993438467451285e-13
2,162.989741,883.213702,100.0,48.746775,19.573123,467.657697,29.607379,1029052.79376,7128.541162,0.001068,...,7128.317477,0.000216,0.000153,3.63494e-10,4e-06,180.55187844824437,127.669459,2.975456e-14,7.811807e-25,3.2009892122408813e-12
3,1125.934896,849.829193,100.0,21.777872,1.963606,459.408015,10.608546,128330.294722,7288.344892,0.043401,...,7287.185252,0.043312,0.030649,5.359753e-06,9.9e-05,449.8835468543956,318.349281,1.710741e-13,1.0300600000000001e-27,7.165893734957774e-13
4,506.53647,493.370771,200.0,48.956604,2.728436,836.414502,22.231017,287791.066036,14532.56154,0.021416,...,14534.964747,0.004839,0.003422,3.790371e-08,2.5e-05,254.0624259180044,179.649363,9.095386e-14,4.2378400000000004e-27,3.817207692599109e-13


Выполняем округление чисел до 6 знаков после запятой:

In [99]:
data = round_data_float(data=data, decimals=6)
print(data.iloc[1, 1])

379.651846


Сохраним датасет на гугл-диск:

In [105]:
with open('/content/drive/My Drive/Data/Data_Processed.txt', 'w') as f:
    dataAsString = data.to_string(header=True, index=False)
    f.write(dataAsString)