In [1]:
import pandas as pd
import datetime

# Functions for precursors calculation

In [2]:
def running_avg(sonde_file, parameters, windowsize=15):
    t = pd.read_csv(sonde_file, sep='\t')
    
    t = t[['sonde', 'year', 'date', 'h', 'm'] + parameters]
    t = t.sort_values(by=['date'])
    
    res = pd.DataFrame()
    
    for h, m in t[['h', 'm']].drop_duplicates().values: 
        t_subset = t.loc[(t.h == h) & (t.m == m)]
        ra = t_subset[parameters].rolling(15, min_periods=1).mean().shift(1)
        nd = t_subset[parameters].rolling(15, min_periods=1).count().shift(1)
        
        ra.rename(index=int,
                  columns=dict(zip(parameters, [p + '_running_avg' for p in parameters])),
                  inplace=True)
        nd.rename(index=int,
                  columns=dict(zip(parameters, [p + '_n_days' for p in parameters])),
                  inplace=True)
        
        res = pd.concat([res, t_subset.join(ra).join(nd)])
        
    return res

In [3]:
def sonde_subset(sonde, parameters, dates, range_):
    subset = pd.DataFrame()
    
    sonde['date'] = sonde['date']\
                   .apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d'))
    min_date = sonde['date'].min()
    sonde['day_temp'] = sonde['date'].apply(lambda x: (x - min_date).days)
    
    for date in dates:
        day = (date - min_date).days
        subset = pd.concat([subset,
                           sonde.loc[(sonde.day_temp - day >= range_[0]) 
                                    & (sonde.day_temp - day <= range_[1])][
                               ['sonde', 'year', 'date', 'h', 'm'] + parameters]
                           ])
    return subset

def correlation(sonde_file1, sonde_file2, parameters, earthquakes_dates, range_=[-10, 4]):
    s1 = pd.read_csv(sonde_file1, sep='\t')
    s2 = pd.read_csv(sonde_file2, sep='\t')
    
    earthquakes_dates = [datetime.datetime.strptime(d, '%Y-%m-%d') 
                         for d in earthquakes_dates]
    s1 = sonde_subset(s1, parameters, earthquakes_dates, range_)
    s2 = sonde_subset(s2, parameters, earthquakes_dates, range_)
    
    s1.rename(index=int,
             columns=dict(zip(['sonde'] + parameters, 
                              ['sonde1'] + [p + '1' for p in parameters])),
             inplace=True)
    s2.rename(index=int,
             columns=dict(zip(['sonde'] + parameters, 
                              ['sonde2'] + [p + '2' for p in parameters])),
             inplace=True)
    
    merged = pd.merge(s1, s2, how='inner', on=['year', 'date', 'h', 'm'])
    res = pd.DataFrame()
    row = {
            'sonde1': merged.sonde1.values[0],
            'sonde2': merged.sonde2.values[0],
        }
    
    for d, y in merged[['date', 'year']].drop_duplicates().values:
        row['date'] = d
        row['year'] = int(y)
        subset = merged.loc[merged.date == d]
        for p in parameters:
            row[p + '_corr'] = subset[p+'1'].corr(subset[p+'2'], min_periods=2)
            row[p + '_n_hours'] = subset[['h', p+'1', p+'2']].dropna().h.nunique()
            row[p + '_total_time_points'] = subset[['h', p+'1', p+'2']].dropna().shape[0]
            
        res = res.append(row, ignore_index=True)
    
    return res

# Example of using

In [4]:
vt139 = running_avg('../ionosondes_data/VT139.csv', ['foF2'])

In [5]:
vt139.head()

Unnamed: 0,sonde,year,date,h,m,foF2,foF2_running_avg,foF2_n_days
34886,VT139,2002,2002-02-26,2,0,6.8,,
34937,VT139,2002,2002-02-27,2,0,7.136,6.8,1.0
34997,VT139,2002,2002-02-28,2,0,6.559,6.968,2.0
35048,VT139,2002,2002-03-01,2,0,5.941,6.831667,3.0
35075,VT139,2002,2002-03-02,2,0,5.2,6.609,4.0


In [6]:
vt_vs_ra = correlation('../ionosondes_data/VT139.csv', '../ionosondes_data/RA041.csv',
                      ['foF2'], ['2019-09-21'])

In [7]:
vt_vs_ra.head(10)

Unnamed: 0,date,foF2_corr,foF2_n_hours,foF2_total_time_points,sonde1,sonde2,year
0,2019-09-11,0.960367,24.0,80.0,VT139,RA041,2019.0
1,2019-09-12,0.968764,23.0,87.0,VT139,RA041,2019.0
2,2019-09-13,0.936932,22.0,68.0,VT139,RA041,2019.0
3,2019-09-14,0.960135,21.0,74.0,VT139,RA041,2019.0
4,2019-09-15,0.93688,23.0,76.0,VT139,RA041,2019.0
5,2019-09-16,0.920758,22.0,67.0,VT139,RA041,2019.0
6,2019-09-17,0.942272,24.0,91.0,VT139,RA041,2019.0
7,2019-09-18,0.974411,23.0,80.0,VT139,RA041,2019.0
8,2019-09-19,0.978056,20.0,66.0,VT139,RA041,2019.0
9,2019-09-20,0.949523,22.0,80.0,VT139,RA041,2019.0
