In [1]:
import cdsapi
import netCDF4
from netCDF4 import num2date
import numpy as np
import os
import pandas as pd

In [2]:
def getData(f, metric, date):
    feature = f.variables[metric]
    
    time_dim, lat_dim, lon_dim = feature.get_dims()

    latitudes = f.variables[lat_dim.name][:]
    longitudes = f.variables[lon_dim.name][:]
    
    return pd.DataFrame(feature[date, :, :], index=latitudes, columns=longitudes).stack().mean()

In [3]:
def transformToCSV(f):
    time_dim = f.variables["time"].get_dims()
    time_var = f.variables[time_dim[0].name]
    times = num2date(time_var[:], time_var.units)
    
    duration = len(times)
    print(f'duration: {duration} days')
    
    features = list(f.variables.keys())[3:]
    print(f'features: {features}')
    
    #colelcting data by feature and date
    data = []
    dataDict = {}
    for feature in features:
        data = []
        for date in range(duration):
            data.append(float(getData(f, feature, date)))
        dataDict[feature] = data
    
    # transforming data into dataframe
    df = pd.DataFrame(dataDict)
    
    # Adding dates
    dates = []
    for i in list(times):
        dates.append(i.isoformat()[:10])
    df['date'] = dates[:duration]
    
    return df

In [4]:
def getLongNames(file_location):
    long_names = []
    f = netCDF4.Dataset(file_location)

    for i in list(f.variables.keys())[3:]:
        long_names.append(f.variables[i].long_name)

    return long_names

file_location = r"C:\Users\Kye Manning-Lees\bushfireModel\2019_data.nc"
print(getLongNames(file_location))

['Divergence', 'Fraction of cloud cover', 'Geopotential', 'Ozone mass mixing ratio', 'Potential vorticity', 'Relative humidity', 'Specific cloud ice water content', 'Specific cloud liquid water content', 'Specific humidity', 'Specific rain water content', 'Specific snow water content', 'Temperature', 'U component of wind', 'V component of wind', 'Vertical velocity', 'Vorticity (relative)']


In [5]:
file_location = r"C:\Users\Kye Manning-Lees\bushfireModel\2017_data.nc"
f_2017 = netCDF4.Dataset(file_location)
df_2017 = transformToCSV(f_2017)
df_2017

duration: 365 days
features: ['d', 'cc', 'z', 'o3', 'pv', 'r', 'ciwc', 'clwc', 'q', 'crwc', 'cswc', 't', 'u', 'v', 'w', 'vo']


Unnamed: 0,d,cc,z,o3,pv,r,ciwc,clwc,q,crwc,cswc,t,u,v,w,vo,date
0,0.000028,0.0,477420.642570,0.000004,-0.003444,0.000132,0.0,0.0,0.000004,0.0,0.0,269.965028,-42.726981,-7.730748,-0.002025,-5.312786e-07,2017-01-01
1,-0.000009,0.0,477329.534030,0.000004,-0.003443,0.000121,0.0,0.0,0.000004,0.0,0.0,270.996854,-45.692117,-5.332659,-0.000635,1.199110e-06,2017-01-02
2,0.000018,0.0,476226.449334,0.000005,-0.003097,0.000155,0.0,0.0,0.000004,0.0,0.0,267.775899,-47.402067,-2.174820,0.000312,1.595173e-05,2017-01-03
3,0.000021,0.0,476254.828078,0.000005,-0.002975,0.000149,0.0,0.0,0.000004,0.0,0.0,268.415721,-48.287632,0.453403,0.001502,2.528151e-05,2017-01-04
4,-0.000033,0.0,475870.602938,0.000005,-0.002866,0.000135,0.0,0.0,0.000004,0.0,0.0,269.616232,-50.383268,-5.729242,0.000266,2.406464e-05,2017-01-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,-0.000038,0.0,475884.009733,0.000004,-0.003165,0.000119,0.0,0.0,0.000004,0.0,0.0,271.256058,-42.756299,-8.290634,0.000476,1.421262e-05,2017-12-27
361,-0.000009,0.0,475289.868376,0.000005,-0.003451,0.000140,0.0,0.0,0.000004,0.0,0.0,269.190556,-49.597304,-1.897707,0.000580,1.623513e-05,2017-12-28
362,0.000023,0.0,475361.700786,0.000004,-0.003060,0.000131,0.0,0.0,0.000004,0.0,0.0,270.083064,-52.894711,1.185846,0.001129,2.964822e-05,2017-12-29
363,0.000012,0.0,475876.739996,0.000005,-0.003333,0.000173,0.0,0.0,0.000004,0.0,0.0,266.644906,-45.225438,-0.265026,-0.000472,9.218666e-06,2017-12-30


In [6]:
file_location = r"C:\Users\Kye Manning-Lees\bushfireModel\2018_data.nc"
f_2018 = netCDF4.Dataset(file_location)
df_2018 = transformToCSV(f_2018)
df_2018

duration: 365 days
features: ['d', 'cc', 'z', 'o3', 'pv', 'r', 'ciwc', 'clwc', 'q', 'crwc', 'cswc', 't', 'u', 'v', 'w', 'vo']


Unnamed: 0,d,cc,z,o3,pv,r,ciwc,clwc,q,crwc,cswc,t,u,v,w,vo,date
0,-7.371722e-06,0.0,475693.202011,0.000005,-0.003145,0.000156,0.0,0.0,0.000004,0.0,0.0,267.833717,-50.005477,0.139891,0.001153,0.000025,2018-01-01
1,1.037791e-05,0.0,476291.676446,0.000004,-0.003126,0.000121,0.0,0.0,0.000004,0.0,0.0,271.148520,-45.368318,-3.335277,-0.000401,0.000018,2018-01-02
2,-2.929270e-05,0.0,476178.365183,0.000004,-0.003436,0.000109,0.0,0.0,0.000004,0.0,0.0,272.443664,-48.728946,-1.137475,0.000738,0.000018,2018-01-03
3,-8.648919e-07,0.0,475785.009930,0.000004,-0.003576,0.000131,0.0,0.0,0.000004,0.0,0.0,270.040097,-49.538595,-0.984466,0.000659,0.000011,2018-01-04
4,1.608014e-05,0.0,476119.553443,0.000004,-0.002989,0.000135,0.0,0.0,0.000004,0.0,0.0,269.675213,-49.883591,-6.865373,-0.000099,0.000018,2018-01-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,1.296852e-05,0.0,473193.770408,0.000005,-0.002910,0.000155,0.0,0.0,0.000004,0.0,0.0,267.888440,-54.607556,-3.800770,-0.000816,0.000016,2018-12-27
361,1.334242e-05,0.0,473087.216239,0.000005,-0.002505,0.000173,0.0,0.0,0.000004,0.0,0.0,266.575116,-57.598365,-7.360747,0.000482,0.000026,2018-12-28
362,9.067304e-06,0.0,472793.696563,0.000005,-0.002841,0.000197,0.0,0.0,0.000004,0.0,0.0,265.095529,-61.027220,-3.274359,0.000157,0.000024,2018-12-29
363,-1.192559e-05,0.0,473778.865412,0.000004,-0.002629,0.000139,0.0,0.0,0.000004,0.0,0.0,269.278303,-62.059979,-3.230934,0.001000,0.000027,2018-12-30


In [7]:
file_location = r"C:\Users\Kye Manning-Lees\bushfireModel\2019_data.nc"
f_2019 = netCDF4.Dataset(file_location)
df_2019 = transformToCSV(f_2019)
df_2019

duration: 365 days
features: ['d', 'cc', 'z', 'o3', 'pv', 'r', 'ciwc', 'clwc', 'q', 'crwc', 'cswc', 't', 'u', 'v', 'w', 'vo']


Unnamed: 0,d,cc,z,o3,pv,r,ciwc,clwc,q,crwc,cswc,t,u,v,w,vo,date
0,0.000002,0.0,474127.960758,0.000005,-0.002643,0.000161,0.0,0.0,0.000004,0.0,0.0,267.483381,-61.190177,-9.878280,-0.000417,0.000023,2019-01-01
1,0.000007,0.0,474835.121530,0.000005,-0.002243,0.000164,0.0,0.0,0.000004,0.0,0.0,267.136460,-60.137412,-6.925914,-0.000149,0.000028,2019-01-02
2,0.000010,0.0,475661.076132,0.000004,-0.002524,0.000133,0.0,0.0,0.000004,0.0,0.0,269.775785,-57.211039,-7.376011,-0.001292,0.000025,2019-01-03
3,-0.000013,0.0,474383.052510,0.000005,-0.002845,0.000161,0.0,0.0,0.000004,0.0,0.0,267.410392,-58.762400,-6.709442,0.000596,0.000023,2019-01-04
4,0.000023,0.0,474634.341958,0.000005,-0.002633,0.000176,0.0,0.0,0.000004,0.0,0.0,266.360261,-55.192837,-9.665992,0.001845,0.000018,2019-01-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,-0.000017,0.0,475755.188744,0.000004,-0.003139,0.000129,0.0,0.0,0.000004,0.0,0.0,270.372048,-41.374916,-5.264596,-0.000772,0.000005,2019-12-27
361,0.000022,0.0,476494.320356,0.000004,-0.003038,0.000126,0.0,0.0,0.000004,0.0,0.0,270.717408,-41.229020,-7.361870,-0.001952,0.000012,2019-12-28
362,-0.000002,0.0,476700.274659,0.000004,-0.002938,0.000126,0.0,0.0,0.000004,0.0,0.0,270.757007,-45.729401,-2.765841,-0.000106,0.000018,2019-12-29
363,0.000017,0.0,476145.790958,0.000004,-0.003003,0.000132,0.0,0.0,0.000004,0.0,0.0,270.065933,-45.290673,-3.805441,-0.000767,0.000020,2019-12-30


In [8]:
file_location = r"C:\Users\Kye Manning-Lees\bushfireModel\2020_data.nc"
f_2020 = netCDF4.Dataset(file_location)
df_2020 = transformToCSV(f_2020)
df_2020

duration: 366 days
features: ['d', 'cc', 'z', 'o3', 'pv', 'r', 'ciwc', 'clwc', 'q', 'crwc', 'cswc', 't', 'u', 'v', 'w', 'vo']


Unnamed: 0,d,cc,z,o3,pv,r,ciwc,clwc,q,crwc,cswc,t,u,v,w,vo,date
0,0.000013,0.0,476183.534774,0.000004,-0.003014,0.000155,0.0,0.0,0.000004,0.0,0.0,268.007423,-48.321339,-8.712423,-0.000740,0.000013,2020-01-01
1,0.000002,0.0,475863.816880,0.000005,-0.002998,0.000145,0.0,0.0,0.000004,0.0,0.0,268.824309,-52.584034,2.373350,0.000954,0.000021,2020-01-02
2,0.000019,0.0,475516.347544,0.000005,-0.002918,0.000217,0.0,0.0,0.000004,0.0,0.0,264.133741,-49.424776,-1.194926,-0.002144,0.000012,2020-01-03
3,0.000008,0.0,474588.735638,0.000005,-0.002820,0.000160,0.0,0.0,0.000004,0.0,0.0,267.680918,-48.635720,-4.428827,0.000061,0.000025,2020-01-04
4,-0.000013,0.0,475633.870026,0.000004,-0.003115,0.000132,0.0,0.0,0.000004,0.0,0.0,270.109437,-48.651998,-10.772258,-0.000291,0.000013,2020-01-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361,-0.000018,0.0,474700.096058,0.000004,-0.003277,0.000121,0.0,0.0,0.000004,0.0,0.0,270.811849,-47.138445,-3.356542,-0.000088,0.000014,2020-12-27
362,0.000014,0.0,474770.937599,0.000004,-0.003325,0.000138,0.0,0.0,0.000004,0.0,0.0,269.145490,-48.325478,-4.213930,-0.000054,0.000009,2020-12-28
363,0.000004,0.0,474724.677807,0.000004,-0.003136,0.000141,0.0,0.0,0.000004,0.0,0.0,268.935514,-52.200542,-5.845193,0.000872,0.000011,2020-12-29
364,0.000006,0.0,474173.128975,0.000005,-0.003229,0.000165,0.0,0.0,0.000004,0.0,0.0,267.048160,-51.691104,-0.231027,-0.000739,0.000014,2020-12-30


In [9]:
df_2017.columns = list(getLongNames(file_location))+['date']
df_2018.columns = list(getLongNames(file_location))+['date']
df_2019.columns = list(getLongNames(file_location))+['date']
df_2020.columns = list(getLongNames(file_location))+['date']

# Write DataFrame to CSV

In [10]:
output_dir = r"C:\Users\Kye Manning-Lees\bushfireModel"

In [11]:
# write 2017 df to CSV

filename = os.path.join(output_dir, f'data_2017.csv')
print(f'Writing to {filename}')
df_2017.to_csv(filename)
print('done')

Writing to C:\Users\Kye Manning-Lees\bushfireModel\data_2017.csv
done


In [12]:
# write 2018 df to CSV

filename = os.path.join(output_dir, f'data_2018.csv')
print(f'Writing to {filename}')
df_2018.to_csv(filename)
print('done')

Writing to C:\Users\Kye Manning-Lees\bushfireModel\data_2018.csv
done


In [13]:
# write 2019 df to CSV

filename = os.path.join(output_dir, f'data_2019.csv')
print(f'Writing to {filename}')
df_2019.to_csv(filename)
print('done')

Writing to C:\Users\Kye Manning-Lees\bushfireModel\data_2019.csv
done


In [14]:
# # write 2020 df to CSV

filename = os.path.join(output_dir, f'data_2020.csv')
print(f'Writing to {filename}')
df_2020.to_csv(filename)
print('done')

Writing to C:\Users\Kye Manning-Lees\bushfireModel\data_2020.csv
done


In [15]:
# # write 2021 df to CSV

# filename = os.path.join(output_dir, f'data_2021.csv')
# print(f'Writing to {filename}')
# df_2021.to_csv(filename)
# print('done')