In [1]:
%run basics
%matplotlib

Using matplotlib backend: Qt4Agg


In [42]:
site_name_list = ["AdelaideRiver","Calperum","CapeTribulation","CowBay","CumberlandPlains",
                  "DalyPasture","DalyUncleared","DryRiver","Emerald","FoggDam","Gingin",
                  "GreatWesternWoodlands","HowardSprings","Otway","RedDirtMelonFarm","RiggsCreek",
                  "RobsonCreek","Samford","SturtPlains","Tumbarumba","Whroo","WombatStateForest",
                  "Yanco"]

In [2]:
site_name_list = ["Calperum"]

In [46]:
results = numpy.array([])
for site_name in site_name_list:
    print "Processing site: ",site_name
    in_name = "/mnt/OzFlux/Sites/"+site_name+"/Data/Portal/"+site_name+"_L3.nc"
    if not os.path.exists(in_name):
        print " File not found: "+in_name
        continue
    ds = qcio.nc_read_series(in_name)
    ts = int(ds.globalattributes["time_step"])
    ntsInDay = int(float(24.0*60.0/float(ts)))
    nts_threshold = int(ntsInDay*0.8+0.5)
    ldt = ds.series["DateTime"]["Data"]
    for year in range(ldt[0].year,ldt[-1].year+1):
    #for year in [2015]:
        start_date = datetime.datetime(year,1,1)+datetime.timedelta(minutes=ts)
        si = qcutils.GetDateIndex(ldt,str(start_date),ts=ts,match="startnextday",default=0)
        end_date = datetime.datetime(year+1,1,1)
        ei = qcutils.GetDateIndex(ldt,str(end_date),ts=ts,match="endpreviousday",default=len(ldt)-1)
        dt = numpy.array(ldt[si:ei+1])
        nDays = int(float(len(dt))/ntsInDay)
        if nDays<250:
            print "Not enough days in year ",year,nDays
            continue
        # get the data
        Fa,f,a = qcutils.GetSeriesasMA(ds,"Fa",si=si,ei=ei)
        Fe,f,a = qcutils.GetSeriesasMA(ds,"Fe",si=si,ei=ei)
        Fh,f,a = qcutils.GetSeriesasMA(ds,"Fh",si=si,ei=ei)
        mask = numpy.ma.mask_or(numpy.ma.getmaskarray(Fa),numpy.ma.getmaskarray(Fe))
        mask = numpy.ma.mask_or(mask,numpy.ma.getmaskarray(Fh))
        Fa = numpy.ma.array(Fa,mask=mask)
        Fe = numpy.ma.array(Fe,mask=mask)
        Fh = numpy.ma.array(Fh,mask=mask)
        # reshape into 2D array of (days,timesteps)
        dt_daily = dt.reshape(nDays,ntsInDay)
        Fa_daily = Fa.reshape(nDays,ntsInDay)
        Fe_daily = Fe.reshape(nDays,ntsInDay)
        Fh_daily = Fh.reshape(nDays,ntsInDay)
        # get the daily averages and the number of non-masked points in each day
        Fa_daily_avg = numpy.ma.average(Fa_daily,axis=1)
        Fa_daily_count = numpy.ma.count(Fa_daily,axis=1)
        Fe_daily_avg = numpy.ma.average(Fe_daily,axis=1)
        Fe_daily_count = numpy.ma.count(Fe_daily,axis=1)
        Fh_daily_avg = numpy.ma.average(Fh_daily,axis=1)
        Fh_daily_count = numpy.ma.count(Fh_daily,axis=1)
        # only use days where more than 80% of the data is present
        condition = (Fa_daily_count<nts_threshold)|(Fe_daily_count<nts_threshold)|(Fh_daily_count<nts_threshold)
        Fa_daily_avg = numpy.ma.masked_where(condition,Fa_daily_avg)
        Fe_daily_avg = numpy.ma.masked_where(condition,Fe_daily_avg)
        Fh_daily_avg = numpy.ma.masked_where(condition,Fh_daily_avg)
        Fa_daily_avg = numpy.ma.compressed(Fa_daily_avg)
        Fe_daily_avg = numpy.ma.compressed(Fe_daily_avg)
        Fh_daily_avg = numpy.ma.compressed(Fh_daily_avg)
        FepFh = Fe_daily_avg + Fh_daily_avg
        if len(Fa_daily_avg)<2:
            print "Not enough good data for",site_name,year
            continue
        x = numpy.array(Fa_daily_avg,copy=True)
        y = numpy.array(FepFh,copy=True)
        x = x[:,numpy.newaxis]
        a, _, _, _ = numpy.linalg.lstsq(x, y)
        pr = scipy.stats.stats.pearsonr(Fa_daily_avg,FepFh)
        print site_name,year,a[0],pr[0]
        results = numpy.append(results,a[0])

Processing site:  AdelaideRiver
Not enough days in year  2007 75
AdelaideRiver 2008 0.998253211457 0.954920650843
Not enough days in year  2009 143
Processing site:  Calperum
Not enough days in year  2010 153
Calperum 2011 0.886211740219 0.991108563456
Calperum 2012 0.918026731725 0.992434829507
Calperum 2013 0.915439974595 0.989566040668
Calperum 2014 0.884973398339 0.990591701153
Calperum 2015 0.875843886491 0.988307898064
Processing site:  CapeTribulation
Not enough good data for CapeTribulation 2013
Not enough good data for CapeTribulation 2014
Not enough days in year  2015 245
Processing site:  CowBay
Not enough good data for CowBay 2009
Not enough good data for CowBay 2010
CowBay 2011 0.929385885001 0.835468434272
CowBay 2012 0.919783669279 0.894046934041
CowBay 2013 0.995664645354 0.690791998986
CowBay 2014 0.891102634422 0.747830889456
Not enough days in year  2015 131
Processing site:  CumberlandPlains
Not enough days in year  2012 74
CumberlandPlains 2013 0.780353628324 0.948

In [47]:
print len(results)

83


In [55]:
bins = [0.70,0.75,0.80,0.85,0.90,0.95,1.00,1.05]
bin_labels = [">0.70",">0.75",">0.80",">0.85",">0.90",">0.95",">1.00",">1.05"]
#bins = [0.70,0.80,0.90,1.00,1.10]
#bin_labels = [">0.70",">0.80",">0.90",">1.00",">1.10"]
hist,edges = numpy.histogram(results,bins=bins)
hist = hist.astype(float)/float(numpy.sum(hist))
bar_width = 0.25
fig,axs = plt.subplots(1,figsize=(6,6))
index = numpy.arange(len(hist))
axs.bar(index,hist)
axs.set_xticks(index+bar_width)
axs.set_xticklabels(bin_labels)
axs.set_ylabel("Frequency",fontsize=16)
axs.set_xlabel("SEB ratio",fontsize=16)
plt.show()