# Převod dat o jaderných testech do GPKG souboru

In [82]:
import geopandas as gpd
import pandas as pd
import seaborn as sns

Načtení dat z csv

In [69]:
df = pd.read_csv("../data/sipri-report-explosions.csv")

In [142]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2051 entries, 0 to 2050
Data columns (total 18 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   date       2051 non-null   datetime64[ns]
 1   origin_t   2051 non-null   float64       
 2   id_no      2051 non-null   int64         
 3   country    2051 non-null   object        
 4   region     2051 non-null   object        
 5   source     2051 non-null   object        
 6   latitude   2051 non-null   float64       
 7   longitude  2051 non-null   float64       
 8   mb         2051 non-null   float64       
 9   Ms         2051 non-null   float64       
 10  depth      2051 non-null   float64       
 11  yield_1    2048 non-null   float64       
 12  yield_u    2046 non-null   float64       
 13  purpose    2050 non-null   object        
 14  name       1386 non-null   object        
 15  type       2051 non-null   object        
 16  date_long  2051 non-null   int64         


Vytvoření nového dataframe obsahující pouze informace o datu a času

In [71]:
NewDF = df[["date_long","origin_t"]]
NewDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2051 entries, 0 to 2050
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   date_long  2051 non-null   int64  
 1   origin_t   2051 non-null   float64
dtypes: float64(1), int64(1)
memory usage: 32.2 KB


Nahrazení špatně formátovaných časů a datumů do vhodného formátu. Vytvoření jednoho string pro vložení dat zpět do původního dataframe.

In [72]:
NewDF.at[221, 'origin_t'] = 235959.0
NewDF.at[1292, 'origin_t'] = 65003.0
OriginTIME = NewDF["origin_t"].astype(str).str.zfill(8)
Datum = NewDF["date_long"].astype(str)

DATE = Datum + " " + OriginTIME

Převod na typ datetime a vložení do původního dataframe jako sloupec date.

In [73]:
df["date"] = pd.to_datetime(DATE,format="%Y%m%d %H%M%S.%f",errors='coerce')

In [140]:
purpose_df = pd.read_csv("purpose.txt")
purpose_df = purpose_df.set_index("purpose")
purpose_df

Unnamed: 0_level_0,purpose_long
purpose,Unnamed: 1_level_1
WR,weapons related
COMBAT,Used in war combat
WE,(used for British French and US tests) to eva...
ME,Test conducted in the context of a military ex...
SE,(used for French and US tests) tests to determ...
FMS,(used for Soviet tests) To study the phenomena...
SB,unknown
SAM,(used for Soviet tests) Tests to study acciden...
PNE:PLO,Peaceful nuclear explosion Plowshare Programme
TRANSP,Transportation-storage purposes


In [143]:
df = df.join(purpose_df,on = "purpose")

In [145]:
geometry = gpd.points_from_xy(df.longitude, df.latitude, crs="EPSG:4326")

In [146]:
GDF = gpd.GeoDataFrame(data = df, geometry = geometry)

In [147]:
GDF.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 2051 entries, 0 to 2050
Data columns (total 20 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   date           2051 non-null   datetime64[ns]
 1   origin_t       2051 non-null   float64       
 2   id_no          2051 non-null   int64         
 3   country        2051 non-null   object        
 4   region         2051 non-null   object        
 5   source         2051 non-null   object        
 6   latitude       2051 non-null   float64       
 7   longitude      2051 non-null   float64       
 8   mb             2051 non-null   float64       
 9   Ms             2051 non-null   float64       
 10  depth          2051 non-null   float64       
 11  yield_1        2048 non-null   float64       
 12  yield_u        2046 non-null   float64       
 13  purpose        2050 non-null   object        
 14  name           1386 non-null   object        
 15  type         

In [148]:
#'sns.displot(data=GDF, x="country", y="year")
GDF["purpose"].unique()

array(['WR', 'COMBAT', 'WE', 'ME', 'SE', 'FMS', 'SB', 'SAM', 'PNE:PLO',
       'TRANSP', 'PNE:V', nan, 'PNE', 'WR/SE', 'WR/WE', 'WR/PNE',
       'WR/SAM', 'PNE/WR', 'SE/WR', 'WR/P/SA', 'WE/SAM', 'WE/WR',
       'WR/F/SA', 'WR/FMS', 'FMS/WR', 'WR/P/S', 'WR/F/S', 'WR/WE/S'],
      dtype=object)

In [149]:
GDF.to_file("NuclearExplosions.gpkg", driver="GPKG")