In [1]:
# Подключение библиотек
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pyproj import Transformer
import xarray as xr
import datetime as dt
import os

In [2]:
# Входные данные
file_name_npz_in = 'C:/Users/Maks/Desktop/Jupyter/2012_thunderbolts_clastered.npz'
file_name_hdf_out = '2012_thunderbolts_clastered.h5'
file_name_png_out = 'C:/Users/Maks/Desktop/Jupyter/plot/thunder_plot_clnb_'
satellite_directory = 'C:/Users/Maks/Desktop/Jupyter/satellite data'
interval = 15

In [3]:
# Преобразование из формата .npz в .hdf, удаление лишних данных, установление даты в качестве индекса
data = np.load(file_name_npz_in)
data_npz = pd.DataFrame(data['strikes']).drop('tail', axis=1).set_index('date')
data_npz.to_hdf(file_name_hdf_out, key='strikes', mode='w', complevel=9)
data_hdf = pd.read_hdf('2012_thunderbolts_clastered.h5', 'strikes')

In [4]:
results = []
for filename in os.listdir(satellite_directory):
    file_path = os.path.join(satellite_directory, filename)
    satellite_data = xr.open_dataset(file_path, decode_timedelta=True)
    
    dataframe_satellite = pd.DataFrame({'date': satellite_data['time'], 'lat': satellite_data['lat'], 'lon': satellite_data['lon']})
    dataframe_satellite['date'] += dt.datetime(1980,1,6,0,0,0)
    dataframe_satellite = dataframe_satellite.set_index('date')
    
    # Вычисляем временной интервал для файла
    year = int(file_path[53:57])
    day_of_year = int(file_path[57:60])
    time_start = dt.datetime(year, 1, 1) + dt.timedelta(day_of_year - 1)
    time_end = time_start + dt.timedelta(1)
    
    # Фильтрация data_hdf за один проход
    mask = (data_hdf.index >= time_start) & (data_hdf.index <= time_end)
    filtered_hdf = data_hdf[mask]
    
    if not filtered_hdf.empty:
        # Обрабатываем только кластеры с clnb > 0
        clusters = filtered_hdf[filtered_hdf['clnb'] > 0].groupby('clnb')
        
        for clnb, group in clusters:
            # Границы кластера
            lat_min, lat_max = group['lat'].min() , group['lat'].max() 
            lon_min, lon_max = group['lon'].min() , group['lon'].max() 
            
            # Проверяем каждую молнию в кластере
            for flash_time in group.index:
                # Временное окно ±15 минут вокруг молнии
                time_min = flash_time - dt.timedelta(minutes=interval)
                time_max = flash_time + dt.timedelta(minutes=interval)
                
                # Поиск совпадений в спутниковых данных
                mask = (
                    (dataframe_satellite.index >= time_min) & 
                    (dataframe_satellite.index <= time_max) &
                    (dataframe_satellite['lat'] >= lat_min) & 
                    (dataframe_satellite['lat'] <= lat_max) & 
                    (dataframe_satellite['lon'] >= lon_min) & 
                    (dataframe_satellite['lon'] <= lon_max)
                )
                
                matched_data = dataframe_satellite[mask]
                
                if dataframe_satellite[mask].any().any():
                    first_match = matched_data.iloc[0]
                    results.append({
                        'clnb': clnb,
                        'satellite_file_name': filename,
                        'flash_time': flash_time,
                        'matched_time': dataframe_satellite[mask].index[0],  
                        'time_dif': abs(flash_time - dataframe_satellite[mask].index[0]),
                        'lat_sat': first_match['lat'],   
                        'lon_sat': first_match['lon'],   
                        'lat_min': lat_min,
                        'lat_max': lat_max,
                        'lon_min': lon_min,
                        'lon_max': lon_max,
                    })
                    # Сохраняем обрезанные данные в HDF
                    output_filename = 'trimmed_' + filename[0:-4] + '_clnb_' + str(clnb) + '.h5'
                    output_dir = "C:/Users/Maks/Desktop/Jupyter/output_directory"
                    os.makedirs(output_dir, exist_ok=True)  # Создаём директорию, если её нет
                    output_path = os.path.join(output_dir, output_filename)
                    
                    # Фильтруем данные за ±15 минут вокруг момента совпадения
                    trim_time_min = matched_data.index[0] - dt.timedelta(minutes=interval)
                    trim_time_max = matched_data.index[0] + dt.timedelta(minutes=interval)
                    trim_mask = (
                        (dataframe_satellite.index >= trim_time_min) & 
                        (dataframe_satellite.index <= trim_time_max)
                    )
                    trimmed_data = dataframe_satellite[trim_mask]
                    
                    # Сохраняем в HDF
                    trimmed_data.to_hdf(output_path, key='satellite_data', mode='w')
                    break  # Прерываем после первого совпадения для этого кластера

# Создание итогового DataFrame
if results:
    satellite_overpass_matching = pd.DataFrame(results).set_index('clnb').sort_index()
else:
    satellite_overpass_matching = pd.DataFrame(columns=['satellite_file_name', 'flash_time', 'matched_time'])
    print("Совпадений не найдено")

# Вывод статистики
print(f"\nРезультаты сопоставления:")
print(f"Обработано файлов: {len(os.listdir(satellite_directory))}")
print(f"Найдено совпадений: {len(satellite_overpass_matching)}")

satellite_overpass_matching


Результаты сопоставления:
Обработано файлов: 3
Найдено совпадений: 6


Unnamed: 0_level_0,satellite_file_name,flash_time,matched_time,time_dif,lat_sat,lon_sat,lat_min,lat_max,lon_min,lon_max
clnb,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
73,TIDI_PB_2012197_P0100_S0450_D011_R01.VEC,2012-07-15 14:14:55,2012-07-15 14:28:47,0 days 00:13:52,57.345863,130.564255,56.904167,59.435,126.382222,132.48
134,TIDI_PB_2012197_P0100_S0450_D011_R01.VEC,2012-07-15 19:04:48,2012-07-15 19:19:48,0 days 00:15:00,57.34565,57.610893,56.368611,62.949167,52.549444,60.376667
154,TIDI_PB_2012197_P0100_S0450_D011_R01.VEC,2012-07-15 22:35:31,2012-07-15 22:42:27,0 days 00:06:56,58.045193,98.05867,57.643889,59.572222,97.761389,98.841667
158,TIDI_PB_2012198_P0100_S0450_D011_R01.VEC,2012-07-16 00:09:07,2012-07-16 00:19:28,0 days 00:10:21,58.045288,71.757469,57.608889,60.4875,68.001667,75.574167
286,TIDI_PB_2012198_P0100_S0450_D011_R01.VEC,2012-07-16 17:45:05,2012-07-16 18:00:05,0 days 00:15:00,65.017097,75.77346,58.589167,68.377778,64.264444,78.740278
303,TIDI_PB_2012198_P0100_S0450_D011_R01.VEC,2012-07-16 19:20:50,2012-07-16 19:34:56,0 days 00:14:06,57.345642,50.8507,56.678611,58.676389,46.381389,51.32


In [5]:
matched_data_hdf = pd.read_hdf('C:/Users/Maks/Desktop/Jupyter/output_directory\\trimmed_TIDI_PB_2012198_P0100_S0450_D011_R01_clnb_286.h5', 'satellite_data')
matched_data_hdf

Unnamed: 0_level_0,lat,lon
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-07-16 17:47:10,10.331592,102.800842
2012-07-16 17:49:19,17.490479,105.159172
2012-07-16 17:51:28,24.483486,108.051453
2012-07-16 17:53:37,31.250389,111.651077
2012-07-16 17:55:46,37.702774,116.196648
2012-07-16 17:57:56,43.707241,122.003403
2012-07-16 18:00:05,49.065937,129.477646
2012-07-16 18:02:14,53.492699,139.014664
2012-07-16 18:04:24,56.614212,150.768768
2012-07-16 18:06:33,58.04528,164.256088


In [6]:
satellite_data = xr.open_dataset('C:/Users/Maks/Desktop/Jupyter/satellite data\\TIDI_PB_2012198_P0100_S0450_D011_R01.VEC', decode_timedelta=True)
    
dataframe_satellite = pd.DataFrame({'date': satellite_data['time'], 'lat': satellite_data['lat'], 'lon': satellite_data['lon']})
dataframe_satellite['date'] += dt.datetime(1980,1,6,0,0,0)
dataframe_satellite = dataframe_satellite.set_index('date')

dataframe_satellite

Unnamed: 0_level_0,lat,lon
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-07-16 00:08:41,37.702625,23.697750
2012-07-16 00:10:51,43.707142,29.504448
2012-07-16 00:13:00,49.065880,36.978859
2012-07-16 00:15:09,53.492668,46.515827
2012-07-16 00:17:19,56.614212,58.270035
...,...,...
2012-07-16 23:55:49,-57.345760,355.219116
2012-07-16 23:58:00,-49.688038,355.874023
2012-07-17 00:00:12,-42.047932,356.591187
2012-07-17 00:02:23,-34.430378,357.393768


In [7]:
dataframe_satellite.loc['2012-07-16 18:00:05']

Unnamed: 0_level_0,lat,lon
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-07-16 18:00:05,49.065937,129.477646
2012-07-16 18:00:05,65.017097,75.77346


In [8]:
dataframe_satellite[1061:]

Unnamed: 0_level_0,lat,lon
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-07-16 23:32:03,-38.236019,168.274673
2012-07-16 23:34:12,-45.865524,169.042877
2012-07-16 23:36:22,-53.514999,169.733612
2012-07-16 15:24:50,-21.010954,323.018280
2012-07-16 15:27:00,-27.899931,326.234772
...,...,...
2012-07-16 23:55:49,-57.345760,355.219116
2012-07-16 23:58:00,-49.688038,355.874023
2012-07-17 00:00:12,-42.047932,356.591187
2012-07-17 00:02:23,-34.430378,357.393768
