In [17]:
import numpy as np
import pandas as pd
import seaborn as sns
import random
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import mutual_info_regression, f_regression

1. Исходные данные

In [18]:
# Фиксация генератора случайных чисел
random_seed = 1900  # для dataset_voltage_1: 1900    для dataset_voltage_2: 2022    для dataset_voltage_3: 1900
np.random.seed(random_seed)

# Число ЛЭП
N_powerlines = 5

# Число признаков для каждой ЛЭП (в общем случае - 6, в частных случаях - 3)
N_feat = 6

# Число строк (число измерений)
N_rows = 1000

# Взаимные сопротивления ЛЭП (точные значения коэффициентов регрессии)
z_powerlines = {
    'z(1)': (0.40 + 0.060j, 0.38 + 0.055j, 0.36 + 0.050j, 0.35 + 0.048j, 0.33 + 0.045j, 0.30 + 0.044j),
    'z(2)': (0.20 + 0.025j, 0.18 + 0.022j, 0.16 + 0.020j, 0.15 + 0.019j, 0.14 + 0.016j, 0.12 + 0.014j),
    'z(3)': (0.18 + 0.022j, 0.16 + 0.020j, 0.15 + 0.018j, 0.13 + 0.016j, 0.12 + 0.015j, 0.11 + 0.014j),
    'z(4)': (0.16 + 0.020j, 0.14 + 0.018j, 0.13 + 0.016j, 0.10 + 0.015j, 0.09 + 0.014j, 0.08 + 0.012j),
    'z(5)': (0.15 + 0.018j, 0.14 + 0.016j, 0.13 + 0.015j, 0.11 + 0.012j, 0.10 + 0.012j, 0.09 + 0.010j,),
}

# Начальные значения токов фазы А (на начало измерений)
I_0 = {
    'A(1)': (400 + 150j, 380 + 140j),
    'A(2)': (200 + 120j, 185 + 110j),
    'A(3)': (450 + 200j, 440 + 185j),
    'A(4)': (600 + 250j, 580 + 230j),
    'A(5)': (200 + 100j, 190 + 100j),
}

# Коэффициенты увеличения начальных значений токов фазы А на конец измерений
k_current = {
    'k(1)': 1.5,
    'k(2)': 1.5,
    'k(3)': 1.5,
    'k(4)': 1.5,
    'k(5)': 1.5,
}

# Отн. погрешности измерений значений токов (элементов матрицы A) и напряжений (элементов вектора b)
k_A = 0.01
k_b = 0.05

2. Запись точных коэффициентов регрессии z_powerlines в файл

In [19]:
df_z_powerlines = pd.DataFrame(z_powerlines)
file_path = 'C:\Datasets/dataset_z_powerlines.xlsx'
df_z_powerlines.to_excel(file_path, index=False, float_format='%.20f')

3. Преобразование форматов исходных данных

In [20]:
# Комплексные операторы поворота фазных токов
b_rot = complex(-0.5, np.sqrt(3) / 2)
c_rot = complex(-0.5, -1 * np.sqrt(3) / 2)

# Создание словаря I_full_0 на основе I_0
I_full_0 = {}

for key in I_0:
    values = I_0[key]
    a1, a2 = values

    key_values = [
        a1,
        a1 * b_rot,
        a1 * c_rot,
        a2,
        a2 * b_rot,
        a2 * c_rot
    ]

    I_full_0[key] = key_values

# Создание вектора начальных значений фазных токов ЛЭП
A_0 = ()

for key in I_full_0:
    values = I_full_0[key]
    A_0 += tuple(values)

# Создание вектора взаимных сопротивлений ЛЭП
z = ()

for key in z_powerlines:
    values = z_powerlines[key]
    z += tuple(values)

# Создание словаря k_current_full для всех фазных проводов ЛЭП
k_current_full = {}

for key in k_current:
    values = k_current[key]
    a_k = values

    key_values = [
        a_k,
        a_k,     # * np.random.uniform(0.995, 1.005),
        a_k,     # * np.random.uniform(0.995, 1.005),
        a_k,     # * np.random.uniform(0.995, 1.005),
        a_k,     # * np.random.uniform(0.995, 1.005),
        a_k,     # * np.random.uniform(0.995, 1.005)
    ]

    k_current_full[key] = key_values

# Создание вектора коэффициентов увеличения начальных значений токов (фазы А)
k_curr = ()

for key in k_current_full:
    values = k_current_full[key]
    k_curr += tuple(values)

4. Формирование матрицы A и вектора b

In [21]:
# Создание пустой матрицы A
A = np.zeros((N_rows, N_powerlines * N_feat)).astype(complex)
# Заполнение первой строки матрицы A
A[0] = A_0
# Заполнение остальных строк матрицы A
for i in range(1, N_rows):
    A[i] = A[i-1]
    for j in range(N_powerlines * N_feat):
        A[i, j:j+1] *= np.random.uniform(1, 1 + k_curr[j] / N_rows)

b = np.sum(A * z, axis=1)

# Генерация названий столбцов
column_names_0 = []

for i in range(1, N_powerlines + 1):
    for j in range(1, int(N_feat/3) + 1):
        column_names_0.append(f"a{j}_{i}")
        column_names_0.append(f"b{j}_{i}")
        column_names_0.append(f"c{j}_{i}")

df_0 = pd.DataFrame(A, columns=column_names_0)
df_0["b"] = b
file_path = 'C:\Datasets/dataset_voltage_0.xlsx'
df_0.to_excel(file_path, index=False, float_format='%.20f')
df_0

Unnamed: 0,a1_1,b1_1,c1_1,a2_1,b2_1,c2_1,a1_2,b1_2,c1_2,a2_2,...,a2_4,b2_4,c2_4,a1_5,b1_5,c1_5,a2_5,b2_5,c2_5,b
0,400.000000+150.000000j,-329.903811+271.410162j,-70.096189-421.410162j,380.000000+140.000000j,-311.243557+259.089653j,-68.756443-399.089653j,200.000000+120.000000j,-203.923048+113.205081j,3.923048-233.205081j,185.000000+110.000000j,...,580.000000+230.000000j,-489.185843+387.294734j,-90.814157-0617.2947340j,200.000000+100.000000j,-186.602540+123.205081j,-13.397460-223.205081j,190.000000+100.000000j,-181.602540+114.544827j,-8.397460-214.544827j,51.550661+89.211562j
1,400.199348+150.074755j,-330.257274+271.700954j,-70.135971-421.649321j,380.141977+140.052307j,-311.633850+259.414547j,-68.758512-399.101661j,200.098011+120.058807j,-203.940198+113.214601j,3.925407-233.345291j,185.036876+110.021926j,...,580.571742+230.226725j,-489.293337+387.379839j,-90.906402-0617.9217550j,200.142276+100.071138j,-186.822224+123.350128j,-13.414573-223.490200j,190.241559+100.127136j,-181.716996+114.617019j,-8.400104-214.612380j,51.365043+89.404792j
2,400.377853+150.141695j,-330.554884+271.945796j,-70.159124-421.788515j,380.669449+140.246639j,-311.752943+259.513684j,-68.859214-399.686174j,200.181675+120.109005j,-204.070545+113.286961j,3.930518-233.649102j,185.158841+110.094446j,...,580.591603+230.234601j,-489.325577+387.405364j,-90.949052-0618.2116580j,200.335655+100.167828j,-186.862084+123.376445j,-13.431284-223.768602j,190.323220+100.170116j,-181.844256+114.697288j,-8.411311-214.898722j,51.420577+89.381060j
3,400.420190+150.157571j,-330.797469+272.145370j,-70.206610-422.073995j,381.002524+140.369351j,-312.106075+259.807643j,-68.868405-399.739525j,200.325944+120.195567j,-204.322407+113.426779j,3.932579-233.771620j,185.257118+110.152881j,...,580.862809+230.342148j,-489.561763+387.592355j,-91.018861-0618.6861730j,200.588837+100.294418j,-186.893687+123.397312j,-13.436167-223.849951j,190.561812+100.295691j,-181.844517+114.697452j,-8.413680-214.959248j,51.459560+89.512780j
4,400.904527+150.339198j,-331.173002+272.454319j,-70.268058-422.443417j,381.484400+140.546884j,-312.136820+259.833236j,-68.882413-399.820829j,200.556935+120.334161j,-204.469893+113.508654j,3.932621-233.774143j,185.501324+110.298084j,...,580.954806+230.378630j,-490.252920+388.139553j,-91.126062-0619.4148580j,200.866832+100.433416j,-187.029549+123.487015j,-13.447170-224.033261j,190.815490+100.429205j,-181.911546+114.739730j,-8.416476-215.030661j,51.684748+89.670429j
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,831.906880+311.965080j,-706.832299+581.507283j,-149.350699-897.879084j,796.877672+293.586511j,-677.716720+564.154298j,-143.882438-835.150708j,410.357594+246.214557j,-425.698718+236.320799j,8.324210-494.831522j,390.249545+232.040270j,...,1234.939547+489.717407j,-1032.421377+817.381305j,-190.677393-1296.099134j,417.519028+208.759514j,-402.101977+265.489454j,-28.111896-468.351327j,393.763237+207.243809j,-386.266011+243.635212j,-17.885687-456.957447j,99.059404+184.649999j
996,832.550847+312.206568j,-707.326779+581.914089j,-149.353545-897.896191j,797.233894+293.717750j,-677.953024+564.351005j,-144.087858-836.343043j,410.677196+246.406317j,-425.766156+236.358236j,8.336125-495.539830j,390.398704+232.128959j,...,1235.768604+490.046170j,-1032.702758+817.604078j,-190.801148-1296.940343j,418.076816+209.038408j,-402.374854+265.669622j,-28.127971-468.619149j,394.096081+207.418990j,-386.346479+243.685966j,-17.890998-457.093113j,99.624005+184.331976j
997,832.719469+312.269801j,-707.454338+582.019031j,-149.495879-898.751886j,797.873561+293.953417j,-678.877547+565.120609j,-144.144582-836.672296j,410.887844+246.532707j,-426.220905+236.610684j,8.345892-496.120432j,390.902573+232.428557j,...,1237.393394+490.690484j,-1033.148926+817.957315j,-191.002849-1298.311369j,418.191498+209.095749j,-402.529799+265.771925j,-28.153697-469.047742j,394.234861+207.492032j,-386.867429+244.014552j,-17.908828-457.548658j,99.473471+184.353148j
998,833.249890+312.468709j,-708.477474+582.860760j,-149.615537-899.471259j,797.938929+293.977500j,-679.014176+565.234344j,-144.354286-837.889500j,411.224614+246.734769j,-426.395265+236.707478j,8.356922-496.776078j,391.174047+232.589974j,...,1237.464771+490.718788j,-1033.711385+818.402621j,-191.163786-1299.405312j,418.536550+209.268275j,-402.829877+265.970053j,-28.164243-469.223439j,394.605419+207.687063j,-387.247111+244.254035j,-17.918173-457.787425j,99.466647+184.199842j


5. Датасет тока небаланса для матрицы A и вектора b

In [22]:
df_0_sum = pd.DataFrame()

for i in range(0, 30, 3):
    feature_group_0 = df_0.iloc[:, i:i+3]
    sum_features_0 = feature_group_0.sum(axis=1)
    df_0_sum[f'Jo_{i//3 + 1}'] = sum_features_0

df_0_sum['b'] = df_0['b']
df_0_sum

Unnamed: 0,Jo_1,Jo_2,Jo_3,Jo_4,Jo_5,Jo_6,Jo_7,Jo_8,Jo_9,Jo_10,b
0,0.000000+0.000000j,2.842171e-14+0.000000e+00j,-1.421085e-14+0.000000e+00j,0.000000+0.000000j,2.842171e-14+0.000000e+00j,-2.842171e-14+0.000000e+00j,-5.684342e-14+0.000000e+00j,2.842171e-14+0.000000e+00j,1.421085e-14+0.000000e+00j,1.421085e-14+0.000000e+00j,51.550661+89.211562j
1,-0.193897+0.126389j,-2.503848e-01+3.651927e-01j,8.321985e-02-7.188293e-02j,-0.212332+0.111927j,-6.064195e-01-1.203150e-02j,9.391051e-02+1.252994e-01j,2.347150e-01+8.129603e-02j,3.720032e-01-3.151904e-01j,-9.452115e-02-6.893398e-02j,1.244593e-01+1.317757e-01j,51.365043+89.404792j
2,-0.336154+0.298976j,5.729166e-02+7.414870e-02j,4.164786e-02-2.531360e-01j,-0.150289+0.078194j,-5.621811e-01+3.869122e-01j,-8.931367e-02-2.134684e-01j,2.626671e-02+1.931710e-01j,3.169740e-01-5.716935e-01j,4.228759e-02-2.243285e-01j,6.765178e-02-3.131880e-02j,51.420577+89.381060j
3,-0.583889+0.228946j,2.804412e-02+4.374687e-01j,-6.388370e-02-1.492746e-01j,-0.190092+0.019631j,-4.499677e-01+7.218067e-02j,1.153200e-01-1.521820e-01j,3.492654e-01+3.037977e-01j,2.821852e-01-7.516697e-01j,2.589828e-01-1.582212e-01j,3.036148e-01+3.389519e-02j,51.459560+89.512780j
4,-0.536533+0.350100j,4.651671e-01+5.592921e-01j,1.966283e-02+6.867218e-02j,-0.044283-0.028641j,-7.801765e-01-2.350933e-01j,7.066789e-03-2.555255e-02j,9.655988e-01+1.644804e-01j,-4.241762e-01-8.966750e-01j,3.901137e-01-1.128297e-01j,4.874689e-01+1.382741e-01j,51.684748+89.670429j
...,...,...,...,...,...,...,...,...,...,...,...
995,-24.276119-4.406721j,-2.472149e+01+2.259010e+01j,-7.016914e+00-1.229617e+01j,-1.123046-4.098023j,1.419064e+01+1.714850e+00j,-4.767864e-01-4.964001e+01j,6.001957e+01-6.617030e+00j,1.184078e+01+1.099958e+01j,-1.269484e+01+5.897641e+00j,-1.038846e+01-6.078427e+00j,99.059404+184.649999j
996,-24.129478-3.775535j,-2.480699e+01+2.172571e+01j,-6.752835e+00-1.277528e+01j,-1.117037-4.332126j,1.459066e+01+7.598354e-01j,-2.658615e-01-5.027171e+01j,6.107476e+01-7.704812e+00j,1.226470e+01+1.070991e+01j,-1.242601e+01+6.088881e+00j,-1.014139e+01-5.988157e+00j,99.624005+184.331976j
997,-24.230748-4.463054j,-2.514857e+01+2.240173e+01j,-6.987169e+00-1.297704e+01j,-0.955479-3.928644j,1.424699e+01+4.960376e-01j,-2.331901e-01-5.035883e+01j,6.063724e+01-7.349577e+00j,1.324162e+01+1.033643e+01j,-1.249200e+01+5.819933e+00j,-1.054140e+01-6.042073e+00j,99.473471+184.353148j
998,-24.843122-4.141791j,-2.542953e+01+2.132234e+01j,-6.813729e+00-1.333383e+01j,-0.892611-3.996782j,1.475590e+01+3.368389e-01j,2.978971e-01-5.060441e+01j,6.109217e+01-6.818254e+00j,1.258960e+01+9.716098e+00j,-1.245757e+01+6.014889e+00j,-1.055986e+01-5.846328e+00j,99.466647+184.199842j


In [23]:
df_0_real = df_0.applymap(lambda x: x.real)
df_0_imag = df_0.applymap(lambda x: x.imag)
df_0_modul = df_0.applymap(lambda x: abs(x))


In [24]:
# sns.pairplot(df_0_modul, kind="reg")
# plt.show()

6. Генерация матрицы A_h и вектора b_d, заданных с погрешностями k_A и k_b

In [25]:
# Фиксация генератора случайных чисел для генерации матрицы A_h и вектора b_d
random_seed = 1986  # для dataset_voltage_1: 1986    для dataset_voltage_2: 1986    для dataset_voltage_3: 2017  
np.random.seed(random_seed)

In [26]:
random_values_1 = np.random.uniform(1 - k_A, 1 + k_A, size=A.shape)
random_values_2 = np.random.uniform(1 - k_b, 1 + k_b, size=b.shape)
A_h = A * random_values_1
b_d = b * random_values_2
# Генерация названий столбцов
column_names = []

for i in range(1, N_powerlines + 1):
    for j in range(1, int(N_feat/3) + 1):
        column_names.append(f"a{j}_{i}_h")
        column_names.append(f"b{j}_{i}_h")
        column_names.append(f"c{j}_{i}_h")

df_complex = pd.DataFrame(A_h, columns=column_names)
df_complex["b_d"] = b_d
file_path = 'C:\Datasets/dataset_voltage_1.xlsx'
df_complex.to_excel(file_path, index=False, float_format='%.20f')
df_complex

Unnamed: 0,a1_1_h,b1_1_h,c1_1_h,a2_1_h,b2_1_h,c2_1_h,a1_2_h,b1_2_h,c1_2_h,a2_2_h,...,a2_4_h,b2_4_h,c2_4_h,a1_5_h,b1_5_h,c1_5_h,a2_5_h,b2_5_h,c2_5_h,b_d
0,400.551003+150.206626j,-327.115056+269.115868j,-70.248269-422.324448j,383.020983+141.112994j,-311.523095+259.322351j,-69.035212-400.707738j,199.544548+119.726729j,-204.541582+113.548451j,3.937960-234.091501j,185.808949+110.480996j,...,579.711772+229.885703j,-492.695897+390.073689j,-90.409219-0614.5422340j,199.857361+99.928680j,-185.225242+122.295714j,-13.444449-223.987941j,189.876204+99.934844j,-181.668176+114.586226j,-8.323296-212.650035j,52.5736280+90.9818700j
1,402.897073+151.086402j,-330.721252+272.082667j,-70.152577-421.749159j,376.509043+138.713858j,-313.805916+261.222648j,-69.414095-402.906926j,198.676489+119.205894j,-204.628024+113.596438j,3.951636-234.904457j,185.748356+110.444968j,...,579.267648+229.709585j,-486.857554+385.451398j,-90.631903-0616.0558890j,198.900355+99.450177j,-186.688274+123.261687j,-13.377447-222.871660j,190.091421+100.048116j,-182.077586+114.844459j,-8.319270-212.547166j,52.0700980+90.6319940j
2,400.306319+150.114870j,-328.644700+270.374298j,-70.684486-424.946931j,383.170748+141.168170j,-314.358308+261.682478j,-68.995364-400.476440j,198.650539+119.190323j,-204.883330+113.738168j,3.960615-235.438243j,183.454454+109.081026j,...,580.538717+230.213629j,-487.131737+385.668472j,-91.010959-0618.6324630j,199.756671+99.878335j,-188.129102+124.212999j,-13.471689-224.441756j,189.110577+99.531883j,-183.283265+115.604935j,-8.417774-215.063845j,53.0669500+92.2428440j
3,403.836807+151.438803j,-332.855436+273.838448j,-70.582240-424.332241j,378.222061+139.344970j,-310.974773+258.865909j,-69.367150-402.634436j,201.758683+121.055210j,-203.157133+112.779893j,3.960431-235.427278j,183.853475+109.318282j,...,583.565870+231.414052j,-492.400176+389.839563j,-91.503133-0621.9779350j,200.494753+100.247376j,-186.879282+123.387801j,-13.534287-225.484654j,189.352013+99.658954j,-181.480592+114.467909j,-8.353137-213.412435j,51.8071840+90.1174660j
4,399.009638+149.628614j,-328.077618+269.907762j,-70.004927-420.861503j,380.630202+140.232180j,-313.281860+260.786406j,-68.942244-400.168113j,200.749613+120.449768j,-204.947354+113.773710j,3.917201-232.857471j,186.664029+110.989422j,...,576.199073+228.492736j,-486.211869+384.940201j,-91.182409-0619.7978690j,199.432065+99.716033j,-185.353794+122.380591j,-13.536987-225.529635j,189.411923+99.690486j,-182.769676+115.280992j,-8.460957-216.167114j,50.2841900+87.2405320j
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,823.897178+308.961442j,-701.482206+577.105789j,-150.761509-906.360705j,795.798866+293.189056j,-680.337269+566.335732j,-144.303345-837.593817j,407.933011+244.759806j,-424.759897+235.799626j,8.264801-491.299968j,392.869372+233.598005j,...,1230.156666+487.820747j,-1033.186847+817.987338j,-192.362883-1307.555988j,419.614478+209.807239j,-404.612891+267.147295j,-28.276893-471.100228j,390.295748+205.418815j,-389.646344+245.767338j,-17.876219-456.715534j,103.731474+193.358890j
996,832.201246+312.075467j,-702.373858+577.839346j,-149.925376-901.333973j,793.122792+292.203134j,-675.161705+562.027417j,-145.412287-844.030555j,407.748837+244.649302j,-425.956425+236.463862j,8.257573-490.870321j,392.132298+233.159745j,...,1224.999386+485.775618j,-1042.461502+825.330201j,-191.446353-1301.326016j,419.786755+209.893378j,-404.553219+267.107897j,-27.967804-465.950717j,390.822486+205.696045j,-384.244159+242.359939j,-18.030577-460.659203j,100.711558+186.344249j
997,833.086681+312.407506j,-709.793681+583.943596j,-150.041396-902.031469j,797.558742+293.837431j,-680.334779+566.333659j,-144.835953-840.685283j,410.349122+246.209473j,-425.174303+236.029677j,8.404710-499.616831j,388.561630+231.036645j,...,1239.560328+491.549785j,-1029.016480+814.685604j,-189.398991-1287.409404j,416.985042+208.492521j,-405.890277+267.990694j,-28.308397-471.625091j,392.223120+206.433221j,-385.384418+243.079151j,-17.741701-453.278766j,100.588959+186.420469j
998,833.154233+312.432837j,-713.576937+587.056061j,-149.420641-898.299566j,797.378686+293.771095j,-676.885819+563.462628j,-143.634920-833.714014j,414.637590+248.782554j,-429.892956+238.649173j,8.356647-496.759727j,393.097917+233.733897j,...,1239.477982+491.517131j,-1035.438716+819.770172j,-192.776964-1310.370633j,419.058123+209.529062j,-400.015025+264.111536j,-28.129494-468.644509j,397.366218+209.140115j,-387.356297+244.322903j,-17.876844-456.731516j,100.462828+186.044645j


7. Датасет тока небаланса для матрицы A_h и вектора b_d, заданных с погрешностями k_A и k_b

In [27]:
df_sum = pd.DataFrame()

for i in range(0, 30, 3):
    feature_group = df_complex.iloc[:, i:i+3]  # Выбираем текущий тройной набор признаков
    sum_features = feature_group.sum(axis=1)  # Суммируем признаки в текущем тройном наборе
    df_sum[f'Jo_{i//3 + 1}'] = sum_features 

df_sum['b_d'] = df_complex['b_d']
df_sum

Unnamed: 0,Jo_1,Jo_2,Jo_3,Jo_4,Jo_5,Jo_6,Jo_7,Jo_8,Jo_9,Jo_10,b_d
0,3.1876770-3.0019550j,2.4626750-0.2723940j,-1.059073-0.816321j,0.111703-0.439371j,-3.799098+2.512716j,-0.866859+0.710248j,-2.6768340+6.9059390j,-3.3933450+5.4171570j,1.1876690-1.7635470j,-0.1152680+1.8710350j,52.5736280+90.9818700j
1,2.0232430+1.4199100j,-6.7109690-2.9704200j,-1.999899-2.102125j,-1.266750+3.588963j,1.590800+1.771755j,1.627361+1.304350j,4.2369730-4.1235020j,1.7781910-0.8949060j,-1.1653660-0.1597960j,-0.3054350+2.3454090j,52.0700980+90.6319940j
2,0.9771330-4.4577630j,-0.1829230+2.3742080j,-2.272175-2.509752j,-1.327977-1.740935j,0.284784+1.455962j,-1.632498-3.496465j,-3.1148220+2.1442680j,2.3960220-2.7503610j,-1.8441190-0.3504210j,-2.5904620+0.0729720j,53.0669500+92.2428440j
3,0.3991310+0.9450100j,-2.1198620-4.4235570j,2.561981-1.592175j,-1.367344-1.722319j,-3.973869+0.423768j,-2.809529-5.235775j,-2.7177760+6.9387840j,-0.3374390-0.7243200j,0.0811840-1.8494770j,-0.4817160+0.7144280j,51.8071840+90.1174660j
4,0.9270940-1.3251270j,-1.5939020+0.8504730j,-0.280541+1.366006j,2.571541-1.583086j,4.508617-0.266279j,6.119870+1.706147j,6.0099890+2.7658050j,-1.1952050-6.3649320j,0.5412850-3.4330110j,-1.8187110-1.1956370j,50.2841900+87.2405320j
...,...,...,...,...,...,...,...,...,...,...,...
995,-28.346536-20.293475j,-28.841748+21.930971j,-8.562086-10.740536j,3.859776-1.984726j,10.120740+6.083929j,3.645775-47.937859j,79.659619-8.334312j,4.6069350-1.7479040j,-13.275307+5.854306j,-17.226815-5.529381j,103.731474+193.358890j
996,-20.097988-11.419159j,-27.451200+10.199996j,-9.950015-9.757158j,1.819811-4.148363j,28.077197+3.086645j,-3.298265-57.510712j,48.167946-18.562613j,-8.9084680+9.7798040j,-12.734268+11.050557j,-11.452250-12.603218j,100.711558+186.344249j
997,-26.748395-5.680368j,-27.611989+19.485807j,-6.420471-17.377681j,-6.565514-0.441312j,2.927801+0.030256j,-5.087079-50.644098j,64.080761-1.526511j,21.144857+18.825985j,-17.213632+4.858124j,-10.902999-3.766393j,100.588959+186.420469j
998,-29.843345+1.189333j,-23.142053+23.519709j,-6.898720-9.328001j,-2.608223-1.391753j,16.741741+5.994539j,2.929316-57.369928j,52.238563-23.967733j,11.262302+0.916670j,-9.0863950+4.9960890j,-7.8669230-3.2684980j,100.462828+186.044645j


In [28]:
df_real = df_complex.applymap(lambda x: x.real)
df_imag = df_complex.applymap(lambda x: x.imag)
df_modul = df_complex.applymap(lambda x: abs(x))

In [29]:
# sns.pairplot(df_modul, kind="reg")
# plt.show()

In [30]:
# plt.figure(figsize=(10, 8))
# sns.heatmap(df_modul.corr(method='pearson').round(2), annot=True, annot_kws={"fontsize": 9}, cmap='coolwarm', linewidth=.5)
# sns.set(rc={"xtick.labelsize":10, "ytick.labelsize":10})
# plt.xticks(rotation=35, ha='right')
# plt.title('Pearson')
# plt.show()

8. Анализ значимости признаков

In [31]:
def vif(df, features):    
    vif, tolerance = {}, {}
    for feature in features:
        X = [f for f in features if f != feature]
        X, y = df[X], df[feature]
        R2 = LinearRegression().fit(X, y).score(X, y)                
        tolerance[feature] = 1 - R2
        vif[feature] = 1 / (tolerance[feature])
    return pd.DataFrame({'VIF': vif, 'Tolerance': tolerance})
vif(df = df_modul, features = df_complex.columns)

Unnamed: 0,VIF,Tolerance
a1_1_h,1031.972325,0.000969
b1_1_h,1203.356412,0.000831
c1_1_h,1182.011536,0.000846
a2_1_h,1223.844314,0.000817
b2_1_h,1304.452086,0.000767
c2_1_h,1047.655952,0.000955
a1_2_h,1010.757865,0.000989
b1_2_h,1000.61056,0.000999
c1_2_h,1220.503568,0.000819
a2_2_h,1051.252512,0.000951


MI - показатель значимости любого (линейного и нелинейного) влияния признака (чем MI больше, тем больше признак влияет на целевую переменную);            
corr - коэф. корреляции Пирсона;                                                                                                                            
f - показатель значимости линейного влияния признака (чем f больше, тем больше признак влияет на целевую переменную);                                       
p - вероятность того, что целевая переменная не зависит от признака (показывает насколько верна гипотеза H0: "признак не влияет на целевую переменную")

In [32]:
X = df_modul.copy()
y = X.pop('b_d')
mi_full = mutual_info_regression(X, y, random_state=2023)
f_full = f_regression(X, y)
f_p_full = pd.DataFrame({'MI':mi_full, 'corr':X.corrwith(y), 'f':f_full[0], 'p':f_full[1]}, index=X.columns)
f_p_full.round(3)

Unnamed: 0,MI,corr,f,p
a1_1_h,1.937,0.987,38409.13,0.0
b1_1_h,1.913,0.987,37480.957,0.0
c1_1_h,1.916,0.984,29594.858,0.0
a2_1_h,1.952,0.986,35690.024,0.0
b2_1_h,1.892,0.987,36458.279,0.0
c2_1_h,1.904,0.986,35003.638,0.0
a1_2_h,1.921,0.985,31484.378,0.0
b1_2_h,1.908,0.987,37663.752,0.0
c1_2_h,1.95,0.986,33966.114,0.0
a2_2_h,1.944,0.984,31170.07,0.0
