In [82]:
# %load qbo_fub2netcdf.py
#!/usr/bin/python3
'''
Read QBO data from the following file: http://www.geo.fu-berlin.de/met/ag/strat/produkte/qbo/qbo.dat
'''

import matplotlib as mpl
from matplotlib import dates
import datetime
import xarray as xr
import pandas as pd
import numpy as np
import cftime


def readsing2(nmonth,nyear):
    headerlines = []
    ye = []
    dats = []
    count = 0
    date = []
    #url = "http://www.geo.fu-berlin.de/met/ag/strat/produkte/qbo/singapore.dat"
    #download(url,"Data/singapore.dat")
    with open('../QBO/qbo_data/singapore.dat') as file:
        for i in range(3):
            headerlines.append(file.readline().strip())
        for year in range(1987,nyear):
            if year == nyear:
                data = np.zeros([nmonth,15])*np.nan
            else:
                data = np.zeros([12,15])*np.nan
            ye.append(file.readline().strip())
            file.readline()
            if (year < 1997):
                data = np.zeros([12,15])*np.nan
                for i in range(14):
                    cols = file.readline()
                    cols = cols.strip().split()
                    for j in range(1,13):
                        if i == 1:
                            date.append(datetime.datetime(year, j, 1))
                        data[j-1,i] = float(cols[j])
            else:
                for i in range(15):
                    cols = file.readline()
                    cols = cols.strip().split()
                    for j in range(1,13):
                        if j < nmonth+1 or year < nyear:
                            if i == 1:
                                # print(year)
                                date.append(datetime.datetime(year, j, 1))
                            data[j-1,i] = float(cols[j])
                        # data[j-1,i] = float(cols[j])
            dats.extend([list(i) for i in data])
            file.readline()
            count = count + 1
#    print(np.shape(dats))
#    print(np.shape(np.array(dats)))
    pressure = [100, 90, 80, 70, 60, 50, 45, 40, 35, 30, 25, 20, 15, 12, 10]
    altitude = -7*np.log(np.array(pressure)/1013.15)
    fds = list(mpl.dates.date2num(date))
#    for i in range(np.shape(dats)[0]-np.shape(fds)[0]):
#        fds.append(np.nan)
    fds = np.array(fds)
    # print(np.shape(fds))
    print(headerlines)
    return np.array(dats).T[::-1],fds,pressure,altitude, 

def readsing():
    #url = "http://www.geo.fu-berlin.de/met/ag/strat/produkte/qbo/qbo.dat"
    #download(url,"Data/singapore_winds_1953-2017_qbo.dat")

# Open the file to read the header
    with open('../QBO/qbo_data/qbo.dat', 'r') as file:
        # Read the first 9 lines and print them
        header_lines = [file.readline().strip() for _ in range(9)]
        print("\n".join(header_lines))
    
    data = np.genfromtxt('../QBO/qbo_data/qbo.dat', skip_header=9,
                         dtype=['S6', 'S4', 'i4', 'i1', 'i4', 'i1', 'i4', 'i1', 'i4', 'i1', 'i4', 'i1', 'i4', 'i1', 'i4', 'i1'],
                         names=['station', 'date', 'p70', 'n70', 'p50', 'n50', 'p40', 'n40', 'p30', 'n30', 'p20', 'n20', 'p15', 'n15', 'p10', 'n10'],
                         delimiter=[6, 4, 6, 2, 5, 2, 5, 2, 5, 2, 5, 2, 5, 2, 5, 2],
                         filling_values=-999999, missing_values=' ')

    # Print the header information
    
    print(np.unique(data['station']))

    station = np.char.decode(data['station'], 'utf-8').astype(str)
    station = np.char.strip(station)
    
    #data = np.genfromtxt('../QBO/qbo_data/qbo.dat',skip_header=9,dtype=['S6','S4','i4','i1','i4','i1','i4','i1','i4','i1','i4','i1','i4','i1','i4','i1'],names=['station','date','p70','n70','p50','n50','p40','n40','p30','n30','p20','n20','p15','n15','p10','n10'],delimiter=[6,4,6,2,5,2,5,2,5,2,5,2,5,2,5,2],filling_values=-999999,missing_values=' ')#
    date = []
    #station = []
    for i in range(len(data)):
        #station.append(data['station'][i])
        if int(data['date'][i]) > 5000:
            date.append(datetime.datetime.strptime('19'+(data['date'][i]).decode('UTF-8'),'%Y%m'))
        else:
            date.append(datetime.datetime.strptime('20'+(data['date'][i]).decode('UTF-8'),'%Y%m'))

    # print(date[-1])
    fds = mpl.dates.date2num(date)
#
    up = np.array(list([data['p70'],data['p50'],data['p40'],data['p30'],data['p20'],data['p15'],data['p10']]))

    pressure = [70,50,40,30,20,15,10]
    altitude = -7*np.log(np.array(pressure)/1013.15)
    return up,fds,pressure,altitude,station

def convert2netcdf(date,u,pressure,fname,station):

    # Replace NaN values in 'u' with a specific fill value (e.g., -9999)
    #u = np.where(np.isnan(u), -9999, u)

    ds = xr.Dataset({'u': (['time','pressure'], u),}, 
                    coords={
                            'pressure': pressure,
                            'time': date,
                           })
    
# Create a new variable for the quality flag
    ds['u_qc'] = xr.DataArray(
        np.where((ds['u'] >= -200) & (ds['u'] <= 200), 0, 1),
        coords=ds['u'].coords, dims=ds['u'].dims
    )
    last = lastmonth(date)
    print(last)
    dt = ds.time.values[:-1]-ds.time.values[1:]
        # Create time_bounds variable with appropriate shape
#    time_bounds = xr.DataArray(
#        np.array([date, date+dt]).T,
#        dims=['time', 'bounds'],
#        attrs={'units': 'days since 1950-01-01 00:00:00'}
#    )
#
    ds['time'].attrs     = {'standard_name': 'time', 
                            'units': 'days since 1950-01-01 00:00:00', 
                            'cell_methods': 'time: mean over months',}
#                            'bounds': time_bounds}  
    ds['pressure'].attrs = {'standard_name': 'air_pressure', 
                            'units': 'hPa'}
    ds['u'].attrs        = {'standard_name': 'eastward_wind', 
                            'units': 'm/s',
                            'cell_methods': 'monthly mean',
                            '_FillValue': np.nan,
                            'u_qc': 0}
    
    ds['u_qc'].attrs = {'long_name': 'Quality flag for eastward wind', 
                        'flag_values': [0, 1],
                        'flag_meanings': 'interpolated missing'}
    # Set bounds for the 'time' variable
#    ds['time'].attrs['bounds'] = time_bounds

    ds.attrs = {'Conventions': 'CF-1.8', 
                'title': 'Monthly mean zonal winds', 
                'history': 'Created by Tobias Kerzenmacher using FUB processing chain', 
                'source': 'FUB and Singapore radiosondes',
                'comment': 'Monthly mean zonal winds at the levels 100, 90, 80, 70, 60, 50, 45, 40, 35, 30, 25, 20, 15, 12, and 10-hPa from Radiosonde data of of the three radiosonde stations Canton Island (closed 1967), Gan/Maledive Islands (closed 1975), and Singapore near the equator from 1953 to the present.',
                'institution': 'Karlsruhe Institute of Technology (KIT), Institute of Meteorology and Climate Research (IMK), Germany',
                'institution_id': 'ROR:04t3en479',
                'license': 'CC-BY 4.0',
                'keywords': 'QBO, radiosonde, zonal wind',
                'frequency': '1M',
                'creator_url': 'https://orcid.org/0000-0001-8413-0539',
                'creator_email': 'kerzenmacher@kit.edu',
                'contact': 'kerzenmacher@kit.edu',
                'creation_date': datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'),
                'further_info_url': 'https://www.atmohub.kit.edu/english/807.php',
                'references': 'Barbara Naujokat (1986) doi: https://doi.org/10.1175/1520-0469(1986)043<1873:AUOTOQ>2.0.CO;2  Christian Marquardt (1998) Die tropische QBO und dynamische Prozesse in der Stratosphäre. PhD Thesis, Met. Abh. FU-Berlin, Serie A, Band 9/Heft 4, Verlag Dietrich Reimer Berlin, 260 S.',
                'standard_name_vocabulary': 'CF Standard Name Table, Version 83',
               }

    #ds.coords['time'].attrs = {'units': time_units}
    # Write to NetCDF file
    ds.to_netcdf(fname)
    return(ds)

#def main():
    # -------------READ----------------------------------------------
up1,fds1,pressure1,altitude1,station = readsing()
tnmoth = np.shape(up1)[1] # total number of months
up = np.zeros([15,tnmoth])*np.nan
nmonth = np.shape(up1)[1]%12 # number of months above a whole year
nyear = mpl.dates.num2date(fds1[-1]).year
up2,fds2,pressure,altitude = readsing2(nmonth,nyear)
fds = fds1
fds[-fds2.size:] = fds2
fds = np.array(fds)

up1 = up1*1.
up1[up1 < -10000] = np.nan
up[3,:] = up1[0,:]
up[5,:] = up1[1,:]
up[7,:] = up1[2,:]
up[9,:] = up1[3,:]
up[11,:] = up1[4,]
up[12,:] = up1[5,:]
up[14,:] = up1[6,:]
up[-up2.shape[0]:,-up2.shape[1]:] = up2
#ind = ~np.isnan(up)
#time,press = np.meshgrid(fds,pressure)
#uu = np.array(up[ind])*.1
# time = time[ind]
# press = press[ind]


time = mpl.dates.num2date(fds)
date = cftime.date2num(time, 'days since 1950-01-01 00:00:00', 'standard')
print(time[0].year,time[0].month)
fname = 'eastward_wind_'+str(time[0].year)+str(time[0].month)+'.nc'
fname = 'eastward_wind_{}{:02d}.nc'.format(time[0].year, time[0].month)
ds=convert2netcdf(date,up.T*0.1,pressure,fname, station)
    

Monthly mean zonal wind components u (0.1 m/s)

IIIII=91700      CANTON ISLAND  02 46 S  171 43 W
=43599      GAN/MALEDIVES  00 41 S   73 09 E
=48694/698  SINGAPORE      01 22 N  103 55 E
YY=year  MM=month
N=1-9 if less than 10 daily values, =0 inter/extrapolated values

IIIII YYMM  70hPaN 50hPaN 40hPaN 30hPaN 20hPaN 15hPaN 10hPaN
[b'08694 ' b'43599 ' b'48694 ' b'48698 ' b'48964 ' b'48968 ' b'91700 ']
['Monthly mean zonal wind components ( 0.1 m/s)', 'at Singapore (48698), 1N/104E', '']
1953 1


TypeError: int() argument must be a string, a bytes-like object or a real number, not 'cftime._cftime.DatetimeGregorian'

In [73]:
ds.time

In [94]:
def lastmonth(date):
    # Create a new array to store the original dates
    time = np.zeros_like(date)

    # Convert the dates to datetime objects
    for i in range(len(date)):
        time[i] = cftime.num2date(date[i], 'days since 1950-01-01 00:00:00')

    # Convert the 'time' variable to a datetime64 array
    #time = pd.to_datetime(ds_time, format='%Y-%m-%d')

    # Calculate the delta time
    delta_time = time.diff()

    # Get the last month of the time series
    last_month = time.iloc[-1].month
    print(last_month)
    # Add the appropriate number of days to the last month in the delta time
    if last_month == 1:
        delta_time.iloc[-1] += 31
    elif last_month == 2:
        delta_time.iloc[-1] += 28
    elif last_month == 3:
        delta_time.iloc[-1] += 31
    elif last_month == 4:
        delta_time.iloc[-1] += 30
    elif last_month == 5:
        delta_time.iloc[-1] += 31
    elif last_month == 6:
        delta_time.iloc[-1] += 30
    elif last_month == 7:
        delta_time.iloc[-1] += 31
    elif last_month == 8:
        delta_time.iloc[-1] += 31
    elif last_month == 9:
        delta_time.iloc[-1] += 30
    elif last_month == 10:
        delta_time.iloc[-1] += 31
    elif last_month == 11:
        delta_time.iloc[-1] += 30
    else:
        delta_time.iloc[-1] += 31

    # Convert the delta time back to a datetime64 array
    delta_time = pd.to_datetime(delta_time, format='%j')

    # Print the delta time
    return(delta_time)

In [95]:
import numpy as np
import cftime

def lastmonth(date):
    # Convert the dates to datetime objects
    time = cftime.num2date(date, 'days since 1950-01-01 00:00:00')

    # Calculate the delta time
    delta_time = np.diff(time)

    # Get the last month of the time series
    last_month = time[-1].month

    # Add the appropriate number of days to the last month in the delta time
    if last_month == 1:
        delta_time[-1] += np.timedelta64(31, 'D')
    elif last_month == 2:
        delta_time[-1] += np.timedelta64(28, 'D')
    elif last_month == 3:
        delta_time[-1] += np.timedelta64(31, 'D')
    elif last_month == 4:
        delta_time[-1] += np.timedelta64(30, 'D')
    elif last_month == 5:
        delta_time[-1] += np.timedelta64(31, 'D')
    elif last_month == 6:
        delta_time[-1] += np.timedelta64(30, 'D')
    elif last_month == 7:
        delta_time[-1] += np.timedelta64(31, 'D')
    elif last_month == 8:
        delta_time[-1] += np.timedelta64(31, 'D')
    elif last_month == 9:
        delta_time[-1] += np.timedelta64(30, 'D')
    elif last_month == 10:
        delta_time[-1] += np.timedelta64(31, 'D')
    elif last_month == 11:
        delta_time[-1] += np.timedelta64(30, 'D')
    else:
        delta_time[-1] += np.timedelta64(31, 'D')

    # Print the delta time
    return delta_time


In [96]:
lastmonth(date)

UFuncTypeError: ufunc 'add' cannot use operands with types dtype('O') and dtype('<m8[D]')

In [115]:
import numpy as np
import pandas as pd
import cftime
#from dateutil.relativedelta import relativedelta

def add_month(date):
    # Convert the dates to datetime objects
    time = cftime.num2date(date, 'days since 1950-01-01 00:00:00')

    # Convert the 'time' variable to a pandas datetime series
    time_series = pd.Series(time)
    time_series += pd.DateOffset(months=1)
    #time_series = time_series.apply(lambda x: x + relativedelta(months=1))
    #time_series = time_series.apply(lambda x: x.to_pydatetime() + relativedelta(months=1))

    # Calculate the delta time
    # delta_time = time_series.diff()



    # Print the delta time
    return delta_time


In [141]:
import cftime
from dateutil.relativedelta import relativedelta
from datetime import datetime


def add_month(date):
    # Convert the date to a datetime object
    time = cftime.num2date(date, 'days since 1950-01-01 00:00:00')
    time = np.datetime64(time, 's')
    print(time)
    # Add one month to the datetime object

    new_date = time + np.timedelta64(1, 'M').astype('timedelta64[D]')+ np.timedelta64(1, 'D').astype('timedelta64[D]')
    print(new_date)
    # Convert the new date back to a CFTimeIndex object
    new_date = cftime.date2num(new_date, 'days since 1950-01-01 00:00:00')

    return new_date
    
add_month(date[-1])


2022-12-01T00:00:00
2023-01-01T00:00:00


AttributeError: 'numpy.datetime64' object has no attribute 'year'

In [118]:
import numpy as np
import pandas as pd
import cftime

def add_month(date):
    # Convert the dates to datetime objects
    time = cftime.num2date(date, 'days since 1950-01-01 00:00:00')

    # Convert the 'time' variable to a pandas datetime series
    time_series = pd.Series(time)
    
    # Add a month to the time series
    #time_series += pd.DateOffset(months=1)
    time_series = time_series.apply(lambda x: x + relativedelta(months=1))

    return time_series

# Example usage:
# Assuming 'your_date_variable' is your original datetime series
your_date_variable = date# your datetime series here

result = add_month(your_date_variable)
print(result)

TypeError: unsupported operand type(s) for +: 'cftime._cftime.DatetimeGregorian' and 'relativedelta'

In [52]:
date[1:]

array([ 1127,  1155,  1186,  1216,  1247,  1277,  1308,  1339,  1369,
        1400,  1430,  1461,  1492,  1520,  1551,  1581,  1612,  1642,
        1673,  1704,  1734,  1765,  1795,  1826,  1857,  1885,  1916,
        1946,  1977,  2007,  2038,  2069,  2099,  2130,  2160,  2191,
        2222,  2251,  2282,  2312,  2343,  2373,  2404,  2435,  2465,
        2496,  2526,  2557,  2588,  2616,  2647,  2677,  2708,  2738,
        2769,  2800,  2830,  2861,  2891,  2922,  2953,  2981,  3012,
        3042,  3073,  3103,  3134,  3165,  3195,  3226,  3256,  3287,
        3318,  3346,  3377,  3407,  3438,  3468,  3499,  3530,  3560,
        3591,  3621,  3652,  3683,  3712,  3743,  3773,  3804,  3834,
        3865,  3896,  3926,  3957,  3987,  4018,  4049,  4077,  4108,
        4138,  4169,  4199,  4230,  4261,  4291,  4322,  4352,  4383,
        4414,  4442,  4473,  4503,  4534,  4564,  4595,  4626,  4656,
        4687,  4717,  4748,  4779,  4807,  4838,  4868,  4899,  4929,
        4960,  4991,

In [58]:
date[0].year

AttributeError: 'numpy.int64' object has no attribute 'year'

In [78]:
ds.time