In [None]:
import struct
import math
import pandas as pd


def read_binary(file_path):
    # Инициализация списков для основных полей
    power = []
    age = []
    coordinate_x = []
    coordinate_y = []
    angle_tetta = []
    angle_phi = []
    energy = []
    time = []

    # Дополнительные важные параметры
    primary_particle = []  # PART0 (14 - p, 5626 - Fe)
    primary_energy = []    # E0 в ГэВ
    n_muons = []          # NMU (число мюонов в ШАЛ)
    n_hadrons = []        # NHADR (число адронов в ШАЛ)
    first_interaction_height = []  # H1INT (высота 1 взаимодействия в см)

    with open(file_path, 'rb') as binary_file:
        while True:
            try:
                # Пропускаем первые 3 поля (N_event, NRUN, NEVENT)
                binary_file.read(4 * 3)

                # Читаем тип первичной частицы (14 - p, 5626 - Fe)
                part0 = struct.unpack('f', binary_file.read(4))[0]
                primary_particle.append(part0)

                # Читаем энергию первичной частицы (ГэВ)
                e0 = struct.unpack('f', binary_file.read(4))[0]
                primary_energy.append(e0)

                # Чтение зенитного угла
                tetta = struct.unpack('f', binary_file.read(4))[0]
                angle_tetta.append(tetta)

                # Чтение азимутального угла
                phi = struct.unpack('f', binary_file.read(4))[0]
                angle_phi.append(phi)

                # Чтение координат оси ШАЛ
                x0 = struct.unpack('f', binary_file.read(4))[0]
                coordinate_x.append(x0)

                y0 = struct.unpack('f', binary_file.read(4))[0]
                coordinate_y.append(y0)

                # Чтение высоты первого взаимодействия
                h1int = struct.unpack('f', binary_file.read(4))[0]
                first_interaction_height.append(h1int)

                # Пропускаем NGAM, NEL
                binary_file.read(4 * 2)

                # Чтение числа адронов
                nhadr = struct.unpack('f', binary_file.read(4))[0]
                n_hadrons.append(nhadr)

                # Чтение числа мюонов
                nmu = struct.unpack('f', binary_file.read(4))[0]
                n_muons.append(nmu)

                # Чтение параметров ШАЛ
                power_eas = struct.unpack('f', binary_file.read(4))[0]
                power.append(math.log10(power_eas))

                age_eas = struct.unpack('f', binary_file.read(4))[0]
                age.append(age_eas)

                # Пропускаем NVD_edep, NVD_npe, MuBundle, MuTrackLenNVD, nMuNVD, eMuNVD, eMuNVD1
                binary_file.read(4 * 7)

                # Пропускаем новые поля 2021 (23-34) и AmplKSM[7][4][4][6] (672 значения)
                binary_file.read(4 * (12 + 672))

                # Пропускаем новые поля 2021 (707-771)
                binary_file.read(4 * 64)

                # Пропускаем EdepCntSCT[9][5][2] (90 значений)
                binary_file.read(4 * 90)

                # Чтение EdepDetNE[9][4][4] (144 значения) - используем для energy
                edep_det_ne = struct.unpack('f'*144, binary_file.read(4*144))
                energy.append(edep_det_ne)

                # Чтение TimDetNE[9][4][4][4] (576 значений) - берем пороговые времена
                tim_det_ne = struct.unpack('f'*576, binary_file.read(4*576))
                threshold_time = tim_det_ne[::4]  # Берем каждое 4-е значение (max время)
                time.append(threshold_time)

                # Пропускаем EdepStNE[9][4] (36 значений) и TimStNE[9][4][4] (144 значения)
                binary_file.read(4 * (36 + 144))

                # Чтение маркера (1762)
                binary_file.read(4)
            except struct.error:
                # Достигнут конец файла
                break

    # Создание DataFrame
    df = pd.DataFrame({
        'power': power,
        'age': age,
        'x': coordinate_x,
        'y': coordinate_y,
        'tetta': angle_tetta,
        'phi': angle_phi,
        'energy': energy,
        'threshold_time': time,
        'primary_particle': primary_particle,
        'primary_energy': primary_energy,
        'n_muons': n_muons,
        'n_hadrons': n_hadrons,
        'first_interaction_height': first_interaction_height
    })

    return df

In [None]:
path = 'drive/MyDrive/Nuclear/spe27p_100k_2022_correct.dat'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
data = read_binary(path)

In [None]:
data

Unnamed: 0,power,age,x,y,tetta,phi,energy,threshold_time,primary_particle,primary_energy,n_muons,n_hadrons,first_interaction_height
0,4.166507,1.444716,13.572407,37.022316,37.367474,47.313324,"(0.0, 0.0, 0.10191129148006439, 0.0, 10.222193...","(0.0, 0.0, 91820.75, 0.0, 91830.7734375, 91847...",14.0,1.024144e+06,10006.0,815.0,2200590.750
1,4.933104,1.430547,40.330677,-61.980999,28.912228,236.400421,"(0.0, 6.218760013580322, 0.0, 4.34439992904663...","(0.0, 61243.91796875, 0.0, 61296.953125, 0.0, ...",14.0,1.670369e+06,14978.0,1854.0,1621342.125
2,4.133581,1.459046,-1.046695,-63.925430,37.267426,182.086060,"(0.0, 0.0, 0.0, 0.0, 0.34477588534355164, 0.0,...","(0.0, 0.0, 0.0, 0.0, 78687.359375, 0.0, 0.0, 0...",14.0,1.141731e+06,10802.0,725.0,1894253.375
3,5.190620,1.326983,40.293152,-21.836197,3.205503,201.268890,"(10.767822265625, 8.262124061584473, 2.4767401...","(52158.63671875, 52172.0703125, 52175.68359375...",14.0,1.102080e+06,16449.0,3024.0,1576046.875
4,5.021614,1.336776,15.542248,10.037846,6.299241,108.097626,"(10.822569847106934, 22.756126403808594, 28.51...","(88056.4765625, 88057.125, 88068.453125, 88101...",14.0,1.272720e+06,10435.0,1372.0,2638823.500
...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,4.503942,1.295468,36.338993,32.139332,44.537529,31.058533,"(0.015013078227639198, 0.0, 0.4512841403484344...","(96602.171875, 0.0, 96608.375, 96601.6875, 0.0...",14.0,1.014815e+06,9927.0,1028.0,2076419.375
99996,5.301322,1.302831,-29.983866,-38.277843,16.121550,158.978851,"(22.493345260620117, 21.379228591918945, 42.03...","(74215.28125, 74196.3203125, 74214.0, 74217.07...",14.0,2.281790e+06,24806.0,3705.0,2151935.500
99997,3.617597,1.473757,18.329506,-13.626085,46.820038,13.862854,"(0.0, 0.0, 0.0, 0.0, 0.06985451281070709, 0.16...","(0.0, 0.0, 0.0, 0.0, 111050.9375, 111021.875, ...",14.0,1.331032e+06,4704.0,216.0,2290491.000
99998,4.017301,1.510002,-36.200687,-53.555149,33.087467,157.593292,"(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",14.0,1.509401e+06,6118.0,275.0,4194023.750


In [None]:
energy_df = pd.DataFrame(data['energy'].to_list(), columns=[f"energy_{i}" for i in range(144)])
data = pd.concat([data, energy_df], axis=1)

threshold_times_df = pd.DataFrame(data['threshold_time'].to_list(), columns=[f"threshold_time_{i}" for i in range(144)])
data = pd.concat([data, threshold_times_df], axis=1)

data = data.drop(['energy', 'threshold_time'], axis=1)

In [None]:
from numpy import sin, cos, atan2

In [None]:
data = pd.concat([data, pd.DataFrame({'sin': sin(data.phi), 'cos': cos(data.phi)})], axis=1).drop('phi', axis=1)

In [None]:
data.columns

Index(['power', 'age', 'x', 'y', 'tetta', 'primary_particle', 'primary_energy',
       'n_muons', 'n_hadrons', 'first_interaction_height',
       ...
       'threshold_time_136', 'threshold_time_137', 'threshold_time_138',
       'threshold_time_139', 'threshold_time_140', 'threshold_time_141',
       'threshold_time_142', 'threshold_time_143', 'sin', 'cos'],
      dtype='object', length=300)

In [None]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [None]:
import numpy as np


def recover_phi(sin_phi, cos_phi):
    phi_rad = np.arctan2(sin_phi, cos_phi)
    phi_deg = np.degrees(phi_rad)
    # Нормализация к диапазону [0, 360)
    phi_deg = np.where(phi_deg < 0, phi_deg + 360, phi_deg)
    return phi_deg


def custom_metric_rmse(y_pred, y_true):
    return np.sqrt(np.mean(np.minimum(np.abs(y_pred - y_true), 360 - np.abs(y_pred - y_true)) ** 2))


In [None]:
import numpy as np
import pandas as pd
from catboost import CatBoostRegressor, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import make_regression


df = data


target_cols = ['power', 'age', 'x', 'y', 'tetta', 'sin', 'cos']


X_train, X_test, y_train, y_test = train_test_split(
    df.drop(target_cols, axis=1),
    df[target_cols],
    test_size=0.2,
    random_state=42
)


cat_features = []

train_pool = Pool(
    data=X_train,
    label=y_train,
    cat_features=cat_features
)

test_pool = Pool(
    data=X_test,
    label=y_test,
    cat_features=cat_features
)

model = CatBoostRegressor(
    iterations=1000,
    learning_rate=0.03,
    depth=6,
    loss_function='MultiRMSE',
    eval_metric='MultiRMSE',
    random_seed=42,
    task_type='GPU',
    verbose=100
)


model.fit(
    train_pool,
    eval_set=test_pool,
    early_stopping_rounds=50,
    plot=True
)


y_pred = model.predict(X_test)

for i in range(y_test.shape[1] - 2):
    rmse = np.sqrt(mean_squared_error(y_test.iloc[:, i], y_pred[:, i]))
    r2 = r2_score(y_test.iloc[:, i], y_pred[:, i])

    print(f"{y_test.iloc[:, i].name} - RMSE: {rmse:.4f}, R²: {r2:.4f}")

print("phi - ", custom_metric_rmse(recover_phi(y_test.iloc[:, -2], y_test.iloc[:, -1]), recover_phi(y_pred[:, -2], y_pred[:, -1])))

feature_importance = model.get_feature_importance()
feature_names = X_train.columns
for score, name in sorted(zip(feature_importance, feature_names), reverse=True):
    print(f"{name}: {score}")

model.save_model('catboost_multi_regression.cbm')


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

0:	learn: 46.2736577	test: 46.3312033	best: 46.3312033 (0)	total: 139ms	remaining: 2m 19s
100:	learn: 23.9734593	test: 24.2489886	best: 24.2489886 (100)	total: 9.13s	remaining: 1m 21s
200:	learn: 20.6062503	test: 20.9009390	best: 20.9009390 (200)	total: 11.3s	remaining: 44.7s
300:	learn: 19.2844328	test: 19.6151727	best: 19.6151727 (300)	total: 13.4s	remaining: 31.1s
400:	learn: 18.5039246	test: 18.8726528	best: 18.8726528 (400)	total: 17.5s	remaining: 26.1s
500:	learn: 17.9459564	test: 18.3584504	best: 18.3584504 (500)	total: 19.8s	remaining: 19.7s
600:	learn: 17.5233986	test: 17.9766258	best: 17.9766258 (600)	total: 21.9s	remaining: 14.5s
700:	learn: 17.1915372	test: 17.6898099	best: 17.6898099 (700)	total: 24.6s	remaining: 10.5s
800:	learn: 16.9107296	test: 17.4519139	best: 17.4519139 (800)	total: 26.8s	remaining: 6.65s
900:	learn: 16.6747826	test: 17.2621262	best: 17.2621262 (900)	total: 30.6s	remaining: 3.36s
999:	learn: 16.4804733	test: 17.1127679	best: 17.1127679 (999)	total: 33