In [1]:
# Подключение библиотек
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pyproj import Transformer

In [2]:
# Входные данные
file_name_npz_in = 'C:/Users/Maks/Desktop/Jupyter/2012_thunderbolts_clastered.npz'
file_name_hdf_out = '2012_thunderbolts_clastered.h5'
file_name_png_out = 'C:/Users/Maks/Desktop/Jupyter/plot/thunder_plot_clnb_'

In [3]:
# Преобразование из формата .npz в .hdf, удаление лишних данных, установление даты в качестве индекса
data = np.load(file_name_npz_in)
data_npz = pd.DataFrame(data['strikes']).drop('tail', axis=1).set_index('date')
data_npz.to_hdf(file_name_hdf_out, key='strikes', mode='w', complevel=9)
data_hdf = pd.read_hdf('2012_thunderbolts_clastered.h5', 'strikes')

In [4]:
# Создание DataFrame "clusters" с информацией о номере кластера, его длительности, количестве разрядов и покрываемой площади
clusters = pd.DataFrame(columns=['clnb', 'durn', 'amnt', 'area'])
for i in range(1, data_hdf.clnb.max() + 1):
    clst = data_hdf.loc[data_hdf['clnb'] == i] 
    amount = len(clst) # Кол-во разрядов в кластере
    if amount > 0:
        date_min = clst.index.min()
        date_max = clst.index.max()
        date_diff = date_max - date_min # Длительность кластера
        min_lat = clst.lat.min()
        min_lon = clst.lon.min()
        max_lat = clst.lat.max()
        max_lon = clst.lon.max()
        average_lat = (min_lat + max_lat) / 2
        average_lon = (min_lon + max_lon) / 2
        zone = round((average_lon + 180) / 6) # Определение UTM-зоны
        if average_lat >= 0:
            epsg_code = 32600 + zone  # Северное полушарие
        else:
            epsg_code = 32700 + zone  # Южное полушарие
        convert_coordinates = Transformer.from_crs('EPSG:4326', 'EPSG:' + str(epsg_code)) # Преобразование координат из WGS84 (широта, долгота) в UTM (метры)
        y1, x1 = convert_coordinates.transform(min_lat, min_lon)
        y2, x2 = convert_coordinates.transform(max_lat, max_lon)
        width = np.abs(x2 - x1)
        height = np.abs(y2 - y1)
        s = width * height # Покрываемая кластером площадь
        clusters_1 = pd.DataFrame({'clnb': [i], 'durn': [date_diff], 'amnt': [amount], 'area': [s]})
        clusters_sum = pd.concat([clusters, clusters_1])
        clusters = clusters_sum
clusters = clusters.set_index('clnb')
clusters

  clusters_sum = pd.concat([clusters, clusters_1])


Unnamed: 0_level_0,durn,amnt,area
clnb,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,0 days 01:11:06,24,6.941394e+09
3,0 days 01:42:43,875,3.714398e+10
4,0 days 03:48:16,280,3.669214e+10
6,0 days 00:22:47,6,3.436433e+08
7,0 days 03:47:47,717,7.568382e+10
...,...,...,...
5432,0 days 00:58:11,23,1.879951e+09
5433,0 days 01:01:27,9,5.428085e+08
5434,0 days 00:19:06,6,5.705392e+08
5435,0 days 00:56:32,8,2.560993e+09


In [None]:
# Построение графиков
amp_max = data_hdf.amp.max()
for i in range(1, data_hdf.clnb.max() + 1):  
    clst = data_hdf[data_hdf['clnb'] == i]
    if len(clst) > 0:
        data_lon = clst['lon']
        data_lat = clst['lat']
        data_amp = np.abs(clst['amp'])
        plt.scatter(data_lon, data_lat, c=data_amp, vmin=0, vmax=amp_max)
        plt.title('clnb ' + str(i))
        plt.xlabel('lon')
        plt.ylabel('lat')
        plt.grid()
        cb = plt.colorbar()
        cb.set_label('amp')
        plt.savefig(file_name_png_out + str(i) + '.png')
        plt.close()

In [5]:
data_hdf

Unnamed: 0_level_0,lat,lon,amp,clnb,dist
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-07-15 00:00:24,60.687778,58.898611,0.0,3,50.777037
2012-07-15 00:00:27,60.752778,58.819444,0.0,3,50.549646
2012-07-15 00:00:30,58.230556,62.963333,2355.0,2,1312.423298
2012-07-15 00:00:31,60.703333,57.167778,4205.0,3,922.544758
2012-07-15 00:00:33,60.677778,59.304444,1899.0,3,975.460100
...,...,...,...,...,...
2012-10-29 15:50:36,53.761667,39.712500,1076.0,0,20015.086796
2012-10-29 17:16:11,54.129722,39.569722,2664.0,0,20015.086796
2012-10-29 17:42:42,49.312222,35.049167,-23683.0,0,20015.086796
2012-10-29 19:06:50,53.819167,39.735000,2641.0,0,20015.086796
