### Import libraries

In [9]:
from netCDF4 import Dataset
import pandas as pd
import matplotlib.pyplot as plt
import csv
import numpy as np
import numpy.ma as ma

### Import datafile

In [10]:
my_example_nc_file = '/Users/radharamachandran/Downloads/Guatemala100year.nc'
fh = Dataset(my_example_nc_file, mode='r')

### Pick out the necessary variables

In [11]:
time = fh.variables['time'][:]
y = fh.variables['yield'][:]
lon = fh.variables['lon'][:]
lat = fh.variables['lat'][:]

### Function that writes a variable to a csv file

In [12]:
def write_to_csv(var_name, output_filename):
    with open(output_filename, "w", newline="") as csvFile:
        outputwriter = csv.writer(csvFile, delimiter=',')
        # yield has 5 levels of nesting, not for other variables
        
        if (np.size(var_name)>1000): ## Quick and dirty way to separate out 'yield' from other variables
            for var0 in var_name: 
                for var1 in var0:
                    for var2 in var1:
                        for var3 in var2:
                            new_var = [0 if (x is np.ma.masked) else x for x in var3]
                            outputwriter.writerow(new_var)
                            
        else:
            outputwriter.writerow(var_name)

### Run function write_to_csv
#### First argument is the variable name, second argument is the file name

In [13]:
write_to_csv(y, 'Guatemala100-yield.csv')
write_to_csv(lat, 'Guatemala100-lat.csv')
write_to_csv(lon, 'Guatemala100-lon.csv')
write_to_csv(time, 'Guatemala100-time.csv')

### Function to read CSV file and compute average

In [14]:
def calc_avg(yieldfile, timefile):
    t = [];
    yld = [];
    
    ## Reads timefile
    with open(timefile, 'rt') as csvfile:
        timereader = csv.reader(csvfile, delimiter=',', quotechar='|')
        for row in timereader:
            arr = np.asarray(row)
            t.append(arr)
    
    ## Reads yieldfile. yld will have yld for each year, written one after another. 
    with open(yieldfile, 'rt') as csvfile:
        yieldreader = csv.reader(csvfile, delimiter=',', quotechar='|')
        for row in yieldreader:
            arr = np.asarray(row)
            yld.append(arr) 
            
            
    ## Each time point has an associated yield -- a 720X360 array.
    len_time = np.size(t,1)
    
    with open('avg-'+yieldfile, 'w+') as csvfile:
        avgwriter = csv.writer(csvfile, delimiter=' ', quoting=csv.QUOTE_MINIMAL)
        for i in range(len_time):
            ## Select the appropriate yield value
            yld_t = yld[i*720:(i+1)*720][:]

            ## Convert into float
            fyld = [[float(column) for column in row] for row in yld_t]

            ## Find nonzero values
            nzy = np.nonzero(fyld)
            len1 = np.size(nzy,1) ##size(len1,0) == 2 for row & column

            total = 0
            for j in range(len1):
                total = total + fyld[(nzy[0][j])][(nzy[1][j])]

            avg = total/len1
            avgwriter.writerow([int(t[0][i]), total, avg])    
        

    return(yld, t)

In [15]:
[yld, t] = calc_avg('Guatemala100-yield.csv', 'Guatemala100-time.csv')