# Interpolate 2-sec_KP_ascent_data to 137 levels -> gen csv

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from scipy.interpolate import interp1d
import glob

In [None]:
def round_(x, digits=0):
    if (type(x) is not int) & (~np.isnan(x)):
        if len(str(x).split('.')[1]) >= digits + 1:
            if x > 0:
                if str(x).split('.')[1][digits] >= '5':
                    return np.ceil(x*10**digits)/10**digits
                else:
                    return np.floor(x*10**digits)/10**digits
            else:
                if str(x).split('.')[1][digits] >= '5':
                    return np.floor(x*10**digits)/10**digits
                else:
                    return np.ceil(x*10**digits)/10**digits
        else:
            return x
    else:
        return x

In [None]:
def interpolate_df(df, date):
    df = df[~df.duplicated(subset=['PRES'], keep='first')]
    new_df = []
    columns = ['PRES', 'TEMP', 'DWPT', 'Wdir', 'Wspd']
    TEMP_lower = df[df['PRES']==df['PRES'].min()]['TEMP'].values[-1]
    TEMP_upper = df[df['PRES']==df['PRES'].max()]['TEMP'].values[0]
    DWPT_lower = df[df['PRES']==df['PRES'].min()]['DWPT'].values[-1]
    DWPT_upper = df[df['PRES']==df['PRES'].max()]['DWPT'].values[0]
    Wdir_lower = df[df['PRES']==df['PRES'].min()]['Wdir'].values[-1]
    Wdir_upper = df[df['PRES']==df['PRES'].max()]['Wdir'].values[0]
    Wspd_lower = df[df['PRES']==df['PRES'].min()]['Wspd'].values[-1]
    Wspd_upper = df[df['PRES']==df['PRES'].max()]['Wspd'].values[0]
    fill_value = ['extrapolate', (TEMP_lower, TEMP_upper), (DWPT_lower, DWPT_upper), (Wdir_lower, Wdir_upper), (Wspd_lower, Wspd_upper)]
    for i in range(len(columns)):
        f = interp1d(df['PRES'], df[columns[i]], bounds_error=False, fill_value=fill_value[i])
        new_df.append(f(xnew))
    new_df = pd.DataFrame(np.asarray(new_df)).T
    new_df.columns = columns
    new_df['date'] = date
    return new_df

In [None]:
def gen_csv_interpolated_to_137(EC_tephi_list):
    base_dir = '/home/deeplearn/cslau'
    file_path = sorted(glob.glob('/home/deeplearn/cslau/2-sec_KP_ascent_data/*'))
    file_path = [file for file in file_path if (file[-2:]=='00') | (file[-2:]=='12')]
    skip_date = pd.read_csv('problem_date_upto100hPa.txt')['Problem'].values
    skip_date = sorted(skip_date)

    pres_list = []
    for i in range(len(EC_tephi_list)):
        data = pd.read_csv(EC_tephi_list[i], skiprows=4, delim_whitespace=True, names=["level", "PRES", "TEMP", "DWPT", "U", "V", "SPFH"])
        data['1/PRES'] = 1/data['PRES']
        data = data.sort_values(by=['1/PRES']).reset_index(drop=True)
        del data['1/PRES']
        pres = data['PRES'].values
        pres_list.append(pres)

    xnew = np.mean(list(zip(*pres_list)), axis=1)
    round_v = np.vectorize(round_)
    xnew = round_v(xnew)
    xnew = xnew[xnew >= 50]

    for file in file_path:
        file_name = file.split('/')[-1]
        if not file_name in skip_date:
            date = file.split('_')[-1]
            df = pd.read_csv(file, skiprows=3, delim_whitespace=True, names=["minute", "sec", "AscR", "HGHT", "PRES", "TEMP", "RELH", "DWPT", "Wdir", "Wspd"])
            for column in df.columns:
                df[column] = pd.to_numeric(df[column], errors='coerce')
            del df['AscR']
            df = df.dropna(axis='index', how='any')

            try:
                new_df = interpolate_df(df, date)
                output = os.path.join(base_dir, '137_interpolate_KP_ascent_data', file_name + '.csv')
                if not os.path.exists(output):
                    new_df.to_csv(output, index=False)
            except:
                print(file_name)

    csv_files = sorted(os.listdir(os.path.join(base_dir, '137_interpolate_KP_ascent_data')))
    csv_files_00 = [file for file in csv_files if file.split('.')[0][-2:]=='00']
    csv_files_12 = [file for file in csv_files if file.split('.')[0][-2:]=='12']

    df_concat_00 = pd.DataFrame()
    df_concat_12 = pd.DataFrame()

    for csv in csv_files_00:
        df_00 = pd.read_csv(os.path.join(base_dir, '137_interpolate_KP_ascent_data', csv))
        df_concat_00 = pd.concat([df_concat_00, df_00])

    for csv in csv_files_12:
        df_12 = pd.read_csv(os.path.join(base_dir, '137_interpolate_KP_ascent_data', csv))
        df_concat_12 = pd.concat([df_concat_12, df_12])

    out_filename_00 = './tephi_interpolate_90_levels_00Z.csv'
    out_filename_12 = './tephi_interpolate_90_levels_12Z.csv'

    if not os.path.exists(out_filename_00):
        df_concat_00.to_csv('tephi_interpolate_90_levels_00Z.csv', index=False)

    if not os.path.exists(out_filename_12):
        df_concat_12.to_csv('tephi_interpolate_90_levels_12Z.csv', index=False)