# Generate .smet files for all GrIS locations

#### Created by Megan Thompson-Munson (2023)


**Input:** MERRA-2 .smet files (`/pl/active/icesheetsclimate/IDS_Greenland2/smet/*.smet`)

**Output:** synthetic .smet files (`/scratch/alpine/metm9666/project-2_smet-generated-4/*.smet`)

After running this script, run SNOWPACK by executing scripts in the `Spinup` folder.

In [1]:
import numpy as np
import pandas as pd
import datetime
import glob
from scipy.interpolate import griddata

In [2]:
files = glob.glob('/pl/active/icesheetsclimate/IDS_Greenland2/smet/*.smet')

In [3]:
def read_smet(file):
  
    # Get coordinates
    lat = float(file[-19:-13])
    lon = float(file[-12:-5])

    # Column names
    names = ['timestamp','TA','QI','U','V','ISWR','ILWR','PSUM','PSUM_PH']

    # Read data, skip header rows, and only grab one year
    df = pd.read_table(file,skiprows=92776,header=None,delim_whitespace=True,names=names,nrows=8760) # Aug 1980-Jul 1991 

    # NEED TO CHANGE BACK TO 1991
    
    # Convert time to timestamp and set as index
    df['time'] = pd.to_datetime(df.timestamp)
    df = df.set_index('time')
    
    # For choosing periods that don't start w/Jan 1, flip the data around to just have one year
    new_idx = [i.replace(year=1990) for i in df.index]
    df = df.reset_index(drop=True)
    df = df.set_index(pd.Index(new_idx)).sort_index()
    df['timestamp'] = [str(t)[0:10]+'T'+str(t)[11:] for t in df.index]
    
    return df

In [4]:
def generate_base(df):
    
    # Create artificial leap day from Feb 28
    leapday = df[df.index.date==pd.to_datetime('1990-02-28')].reset_index(drop=True)

    # Create dataframe without index for the leap year
    df_leapyear = df.reset_index(drop=True)

    # Create and set new indices for combining on
    idx = df_leapyear[df_leapyear.timestamp=='1990-02-28T23:30:00'].index.values[0]
    new_idx0 = np.arange(0,idx)
    new_idx1 = np.arange(idx+24,8784)
    new_idx = np.concatenate([new_idx0,new_idx1])
    df_leapyear = df.set_index(new_idx)

    # Set index for leapday
    df_leapday = leapday.set_index(np.arange(idx,idx+24))

    # Combine and sort by index
    df_concat = pd.concat([df_leapyear,df_leapday]).sort_index()
    
    # Create a 4-year dataset with the pattern (leap, no leap, no leap, no leap)
    arr_4yr = np.concatenate([df_concat.values,df.values,df.values,df.values])
    
    # Repeat the 4-year set 25x to generate 100 years
    arr_100yr = np.tile(arr_4yr,(25,1))
    
    # Create a dataframe 100 years in length
    ts100a = pd.date_range(start='1980-01-01 00:30:00',end='2079-12-31 23:30:00',freq='1H')
    df100a = pd.DataFrame(index=ts100a,columns=df.columns,data=arr_100yr)
    df100a['timestamp'] = [str(t)[0:10]+'T'+str(t)[11:] for t in df100a.index]
    
    return df100a

In [5]:
# Extend time series to 100 years in length and apply a perturbation
def perturb(df100a,variable):
    
    df100b_INC = df100a.copy()
    df100b_DEC = df100a.copy()
    
    # Apply perturbation
    df100b_INC[variable] = df100b_INC[variable]+1
    df100b_DEC[variable] = df100b_DEC[variable]-1
        
    # Create 100-year index for second half of 100 years
    ts100b = pd.date_range(start='2080-01-01 00:30:00',end='2180-01-01 23:30:00',freq='1H')
    
    # Add it to both dataframes
    df100b_INC = df100b_INC.set_index(ts100b,drop=True)
    df100b_INC['timestamp'] = [str(t)[0:10]+'T'+str(t)[11:] for t in df100b_INC.index]
    df100b_DEC = df100b_DEC.set_index(ts100b,drop=True)
    df100b_DEC['timestamp'] = [str(t)[0:10]+'T'+str(t)[11:] for t in df100b_DEC.index]
    
    return pd.concat([df100a,df100b_INC]), pd.concat([df100a,df100b_DEC])

In [6]:
# Function to write variables to new .smet file
def write_smet(file,station,df,experiment):
    
    # Open file to write to
    f_new = open('/scratch/alpine/metm9666/project-2_smet/{}_{}.smet'.format(file[-19:-5],experiment),'w')

    # Open original file and write header to new one; change station name and ID
    f_original = open(file,'r')
    for i in range(16):
        header = f_original.readline()
        if (i < 2) | (i > 3):
            f_new.write(header)
        if i==2:
            f_new.write('station_id       = {}_{}\n'.format(station,experiment))
        if i==3:
            f_new.write('station_name     = {}\n'.format(station))
    f_original.close()
    f_new.close()
    
#     with open('/scratch/alpine/metm9666/project-2_smet-generated-3/{}_{}.smet'.format(file[-19:-5],experiment),'ab') as f:
    with open('/scratch/alpine/metm9666/project-2_smet/{}_{}.smet'.format(file[-19:-5],experiment),'ab') as f:
        np.savetxt(f,df.values,fmt='%s %.2f %.10f %.3f %.3f %.1f %.20f %.3f %.1f')

In [7]:
def generate_synth_file(fname,station,df,variable='',write='no'):
    
    # Create synthetic 100-year time series and extend
    df_synth_extend = generate_base(df)
    
    # Apply perturbations
    df_synth_perturb_INC,df_synth_perturb_DEC = perturb(df_synth_extend,variable)
    
    if write == 'yes':
        write_smet(fname,station,df_synth_perturb_INC,'{}_{}'.format(variable,'INC'))
        write_smet(fname,station,df_synth_perturb_DEC,'{}_{}'.format(variable,'DEC'))

    return df_synth_perturb_INC,df_synth_perturb_DEC

In [10]:
for i in range(1427,len(files)):
    generate_synth_file(fname=files[i],station=i,df=read_smet(files[i]),variable='TA',write='yes')

In [20]:
nfiles = glob.glob('/scratch/alpine/metm9666/project-2_smet/*.smet')
len(nfiles)/2

1985.0

In [2]:
nfiles = glob.glob('/projects/metm9666/project-2/Spinup/cfgfiles/*.ini')
len(nfiles)/2

1288.0