## Data proccessing for supplementary models

In [1]:
import os
os.chdir('/compass/Shared/Users/bogdanov/vyzkumny_ukol/')
from pathlib import Path
import matplotlib.pyplot as plt
from datetime import datetime
import time 
import re
from cdb_extras import xarray_support as cdbxr   # načítání dat z databáze COMPASSu
from pyCDB import client
import pandas as pd
import matplotlib.pyplot as plt
import imgs_processing as imgs
import numpy as np
from tqdm.auto import tqdm
cdb = client.CDBClient()

path = Path('/compass/Shared/Users/bogdanov/vyzkumny_ukol')

In [2]:
shot = 18463

# Load data from CDB
h_alpha_signal = cdb.get_signal(f"H_alpha/SPECTROMETRY_RAW:{shot}")
mc_signal = cdb.get_signal(f"Mirnov_coil_A_theta_13_RAW/MAGNETICS_RAW:{shot}")
divlp_signal = cdb.get_signal(f"DIVLPB01/STRATUS:{shot}")

# Convert data to pandas dataframe
h_alpha_df = pd.DataFrame({'time':h_alpha_signal.time_axis.data, 'h_alpha':h_alpha_signal.data}) 
mc_df = pd.DataFrame({'time':mc_signal.time_axis.data, 'mc':mc_signal.data})
div_df = pd.DataFrame({'time':divlp_signal.time_axis.data, 'divlp':divlp_signal.data})

# Merge dataframes
signals_df = pd.merge(div_df, pd.merge(h_alpha_df, mc_df, on='time', how='inner'), on='time', how='inner')

# Downsample data
signals_df = signals_df.iloc[::10]
signals_df = signals_df.set_index('time')

signals_df['h_alpha']*=-1

# Remove data with no plasma
discharge_start, discharge_end = imgs.discharge_duration(shot, 4e4)
signals_df = signals_df[np.logical_and(signals_df.index>discharge_start, signals_df.index<discharge_end)]

In [3]:
variant = 'seidl_2023'

# Create a column with mode labels. These are all L-mode by default.
signals_df['mode'] = 'L-mode'

# Load labels from CDB
t_ELM_start = cdb.get_signal(f"t_ELM_start/SYNTHETIC_DIAGNOSTICS:{shot}:{variant}")
t_ELM_end = cdb.get_signal(f"t_ELM_end/SYNTHETIC_DIAGNOSTICS:{shot}:{variant}")
t_H_mode_start = cdb.get_signal(f"t_H_mode_start/SYNTHETIC_DIAGNOSTICS:{shot}:{variant}")
t_H_mode_end = cdb.get_signal(f"t_H_mode_end/SYNTHETIC_DIAGNOSTICS:{shot}:{variant}")

#TODO:  To create a DataFrame with only one row, one needs to specify an index, 
# so if plasma enters H-mode more than once during one shot index have to be passed. Thus crutch with try: except:
try:
    len(t_ELM_start.data)
except:
    t_ELM = pd.DataFrame({'start':t_ELM_start.data, 'end':t_ELM_end.data}, index=[0])
else:
    t_ELM = pd.DataFrame({'start':t_ELM_start.data, 'end':t_ELM_end.data})

try:
    len(t_H_mode_start.data)
except:
    t_H_mode = pd.DataFrame({'start':t_H_mode_start.data, 'end':t_H_mode_end.data}, index=[0])
else:
    t_H_mode = pd.DataFrame({'start':t_H_mode_start.data, 'end':t_H_mode_end.data})


for H_mode in t_H_mode.values:
    signals_df.loc[H_mode[0]:H_mode[1], 'mode'] = 'H-mode'

for elm in t_ELM.values:
    signals_df.loc[elm[0]:elm[1], 'mode'] = 'ELM'

# Save data
signals_df.to_csv(f'{path}/data/LP_MC_H_alpha/LP_MC_H_alpha_shot_{shot}.csv')

### Process multiple discharges

In [6]:
data_dir_path = f'{path}/data/LH_alpha'
file_names = os.listdir(data_dir_path)
shot_numbers = [int(re.search(r'shot_(\d+)', file_name).group(1)) for file_name in file_names]

processed_shots = [int(re.search(r'shot_(\d+)', file_name).group(1)) for file_name in os.listdir(f'{path}/data/LP_MC_H_alpha')]
removed_shots = [17848]
shot_numbers = [valid_shot for valid_shot in shot_numbers if valid_shot not in removed_shots and valid_shot not in processed_shots]

for shot in tqdm(shot_numbers):
    print('working on shot:', shot)

    # Load data from CDB
    h_alpha_signal = cdb.get_signal(f"H_alpha/SPECTROMETRY_RAW:{shot}")
    mc_signal = cdb.get_signal(f"Mirnov_coil_A_theta_13_RAW/MAGNETICS_RAW:{shot}")
    divlp_signal = cdb.get_signal(f"DIVLPB01/STRATUS:{shot}")

    # Convert data to pandas dataframe
    h_alpha_df = pd.DataFrame({'time':h_alpha_signal.time_axis.data, 'h_alpha':h_alpha_signal.data}) 
    mc_df = pd.DataFrame({'time':mc_signal.time_axis.data, 'mc':mc_signal.data})
    div_df = pd.DataFrame({'time':divlp_signal.time_axis.data, 'divlp':divlp_signal.data})

    # Merge dataframes. !!! Most computationally expensive step !!! Mirnov coils sampling frequency is 2 times higher than others.
    signals_df = pd.merge(div_df, pd.merge(h_alpha_df, mc_df, on='time', how='inner'), on='time', how='inner')

    # Downsample data
    signals_df = signals_df.iloc[::10]  # Downsample to 166 kHz
    signals_df = signals_df.set_index('time')

    signals_df['h_alpha']*=-1

    # Remove data with no plasma
    discharge_start, discharge_end = imgs.discharge_duration(shot, 4e4)
    signals_df = signals_df[np.logical_and(signals_df.index>discharge_start, signals_df.index<discharge_end)]

    variant = 'seidl_2023'

    # Create a column with mode labels. These are all L-mode by default.
    signals_df['mode'] = 'L-mode'

    # Load labels from CDB
    t_ELM_start = cdb.get_signal(f"t_ELM_start/SYNTHETIC_DIAGNOSTICS:{shot}:{variant}")
    t_ELM_end = cdb.get_signal(f"t_ELM_end/SYNTHETIC_DIAGNOSTICS:{shot}:{variant}")
    t_H_mode_start = cdb.get_signal(f"t_H_mode_start/SYNTHETIC_DIAGNOSTICS:{shot}:{variant}")
    t_H_mode_end = cdb.get_signal(f"t_H_mode_end/SYNTHETIC_DIAGNOSTICS:{shot}:{variant}")

    #TODO:  To create a DataFrame with only one row, one needs to specify an index, 
    # so if plasma enters H-mode more than once during one shot index have to be passed. Thus crutch with try: except:
    try:
        len(t_ELM_start.data)
    except:
        t_ELM = pd.DataFrame({'start':t_ELM_start.data, 'end':t_ELM_end.data}, index=[0])
    else:
        t_ELM = pd.DataFrame({'start':t_ELM_start.data, 'end':t_ELM_end.data})

    try:
        len(t_H_mode_start.data)
    except:
        t_H_mode = pd.DataFrame({'start':t_H_mode_start.data, 'end':t_H_mode_end.data}, index=[0])
    else:
        t_H_mode = pd.DataFrame({'start':t_H_mode_start.data, 'end':t_H_mode_end.data})


    for H_mode in t_H_mode.values:
        signals_df.loc[H_mode[0]:H_mode[1], 'mode'] = 'H-mode'

    for elm in t_ELM.values:
        signals_df.loc[elm[0]:elm[1], 'mode'] = 'ELM'

    # Save data
    signals_df.to_csv(f'{path}/data/LP_MC_H_alpha/LP_MC_H_alpha_shot_{shot}.csv')

  0%|          | 0/41 [00:00<?, ?it/s]

working on shot: 18127
working on shot: 20144
working on shot: 20143
working on shot: 18267
working on shot: 18260
working on shot: 19240
working on shot: 18130
working on shot: 20145
working on shot: 18128
working on shot: 19263
working on shot: 18489
working on shot: 18509
working on shot: 19915
working on shot: 20146
working on shot: 17677
working on shot: 16773
working on shot: 16532
working on shot: 18200
working on shot: 16987
working on shot: 18263
working on shot: 19094
working on shot: 18057
working on shot: 16989
working on shot: 19244
working on shot: 19238
working on shot: 18133
working on shot: 18476
working on shot: 16534
working on shot: 19083
working on shot: 19393
working on shot: 17837
working on shot: 19379
working on shot: 17839
working on shot: 19925
working on shot: 20147
working on shot: 17854
working on shot: 18132
working on shot: 20009
working on shot: 19237
working on shot: 19239
working on shot: 19242
