In [5]:
# Load dataset
import netCDF4 as nc

# Open the file
ds = nc.Dataset('./cru_ts4.07.2011.2020.cld.dat.nc')

In [6]:
# Print the variables and dimensions in the file
print(ds.variables)

{'lon': <class 'netCDF4._netCDF4.Variable'>
float32 lon(lon)
    long_name: longitude
    units: degrees_east
unlimited dimensions: 
current shape = (720,)
filling on, default _FillValue of 9.969209968386869e+36 used, 'lat': <class 'netCDF4._netCDF4.Variable'>
float32 lat(lat)
    long_name: latitude
    units: degrees_north
unlimited dimensions: 
current shape = (360,)
filling on, default _FillValue of 9.969209968386869e+36 used, 'time': <class 'netCDF4._netCDF4.Variable'>
float32 time(time)
    long_name: time
    units: days since 1900-1-1
    calendar: gregorian
unlimited dimensions: time
current shape = (120,)
filling on, default _FillValue of 9.969209968386869e+36 used, 'cld': <class 'netCDF4._netCDF4.Variable'>
float32 cld(time, lat, lon)
    long_name: cloud cover
    units: percentage
    correlation_decay_distance: 600.0
    _FillValue: 9.96921e+36
    missing_value: 9.96921e+36
unlimited dimensions: time
current shape = (120, 360, 720)
filling on, 'stn': <class 'netCDF4._net

In [7]:
print(ds.dimensions)

{'lon': <class 'netCDF4._netCDF4.Dimension'>: name = 'lon', size = 720, 'lat': <class 'netCDF4._netCDF4.Dimension'>: name = 'lat', size = 360, 'time': <class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'time', size = 120}


In [8]:
# print lowest and highest longitude and latitude
print(ds.variables['lon'][:].min(), ds.variables['lon'][:].max())
# print lowest and highest time
print(ds.variables['time'][:].min(), ds.variables['time'][:].max())

-179.75 179.75
40557.0 44179.0


In [9]:
from numpy.ma import masked
import datetime

start = datetime.datetime(1900, 1, 1)
finalOutput = dict()

timeItems = ds.variables['time'][:]
cldItems = ds.variables['cld'][:]
latItems = ds.variables['lat'][:]
lonItems = ds.variables['lon'][:]

for x in range(len(timeItems)):
    dsRawTime = timeItems[x]
    dsTime = start + datetime.timedelta(days=int(dsRawTime))
    if dsTime.year != 2019 and dsTime.year != 2020:
        continue

    for y in range(len(latItems)):
        for z in range(len(lonItems)):
            val = cldItems[x][y][z]
            if val is masked:
                continue

            lat = float(latItems[y])
            lon = float(lonItems[z])
            if lat not in finalOutput:
                finalOutput[lat] = dict()
            if lon not in finalOutput[lat]:
                finalOutput[lat][lon] = [0, 0]
            finalOutput[lat][lon][0] += val
            finalOutput[lat][lon][1] += 1
results = []
for lat in finalOutput:
    for lon in finalOutput[lat]:
        results.append([lat, lon, finalOutput[lat][lon][0] / finalOutput[lat][lon][1]])
results.sort()
with open("cloud_cover.csv", "w") as file:
    file.write(",".join(["lat", "lon", "cld"]))
    file.write("\n")
    for result in results:
        file.write(",".join([str(x) for x in result]))
        file.write("\n")            
