In [1]:
def make_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

def to_datetime(x):
    return np.array(x, dtype=np.datetime64)

def get_air_quality_sensors(df, lat_aq, lon_aq, limit=20):
    R = 6373.0

    station_list = df.reset_index()[['name', 'lat', 'lon']]

    names, distances, angles = [], [], []

    for name, lat, lon in station_list.values:

        dlon = lon - lon_aq
        dlat = lat - lat_aq

        a = sin(dlat / 2)**2 + cos(lat_aq) * cos(lat) * sin(dlon / 2)**2
        c = 2 * atan2(sqrt(a), sqrt(1 - a))

        distance = int(R * c * 1000)    
        radians_angle = atan2(lat-lat_aq, lon-lon_aq)
        degrees_angle = int(degrees(radians_angle))

        names.append(name)
        distances.append(distance)
        angles.append(degrees_angle)

    df_stations = pd.DataFrame({'station': names, 'distance_m': distances, 'angles_deg': angles})
    df_stations = df_stations.sort_values(by=['distance_m'])
    return df_stations[:limit]['station'].values.tolist()

def return_stats(x, y):
    r, pvalue = stats.pearsonr(x, y)
    r2 = r ** 2
    mse = np.mean(((y - x)**2))
    rmse = np.sqrt(mse)
    rpd = np.std(x) / np.sqrt(mse)
#     print(f'r=%.3f r²=%.3f mse=%.3f rmse=%.3f rpd=%.3f p-value=%.3f' % (r, r2, mse, rmse, rpd, pvalue))
    return r, r2, mse, rmse, rpd, pvalue