In [None]:
import numpy as np
from datetime import datetime
import yaml
import csv
import sys

sys.path.append('/home/Kevin.Dougherty/GSI_plots')

from runDiagnostics import conventional, satellite

def calculate_stats(data):
    
    n = len(data)
    
    mean = np.mean(data)
    var_list = [(x-mean)**2 for x in data]
    variance = np.sum(var_list)/(len(var_list)-1)
    std = np.sqrt(variance)
    mx = max(data)
    mn = min(data)
    
    rmse = np.sqrt(np.mean(np.square(data)))
    
    return mn, mx, mean, std, rmse

def write_csv(nc_file, csv_dict):
    
    fields = ['Date', 'min', 'max', 'mean', 'std', 'rmse']
    
    # filename of csv file
    filename = csv_dict[0]['Date'] + '_' + nc_file.split('/')[-1].split('.')[0] +'.csv'
    
    # write to csv file
    with open(filename, 'w') as csvfile:
        # creating a csv dict writer object
        writer = csv.DictWriter(csvfile, fieldnames=fields)

        # writing headers (filed names)
        writer.writeheader()

        # writing data rows
        writer.writerows(csv_dict)
        
        return
        

def main(parsed_yaml_file):
    
    for group in parsed_yaml_file['diagnostic']:
        for groupType in group.keys():
            
            if groupType == 'conventional input':
        
                nc_file   = group[groupType]['path'][0]
                obs_id    = group[groupType]['observation id']
                qc_flag   = group[groupType]['qc flag']
                DATA_TYPE = group[groupType]['data type'][0]

                diag = conventional(nc_file)

                data = diag.getData(DATA_TYPE, obs_id, qc_flag)
            
            elif groupType == 'radiance input':
                
                nc_file   = group[groupType]['path'][0]
                channel   = group[groupType]['channel']
                qc_flag   = group[groupType]['qc flag']
                DATA_TYPE = group[groupType]['data type'][0]

                diag = satellite(nc_file)

                data = diag.getData(DATA_TYPE, channel, qc_flag)

                lats, lons = diag.get_lat_lon(channel, qc_flag)
        
            else:
                print('File type not recognized. Please address in yaml file.')
                return
            
    
            metadata = diag.get_metadata()

            mn, mx, mean, std, rmse = calculate_stats(data)

            date = metadata['Date'].strftime("%Y%m%d%H")

            # dictionary of values needed in csv
            csv_dict = [{'Date' : date,
                        'min'  : mn,
                        'max'  : mx,
                        'mean' : mean,
                        'std'  : std,
                        'rmse' : rmse
                       }]

            write_csv(nc_file, csv_dict)
        
    return

        
#########################################################        


file = open('test_YAML.yaml')
parsed_yaml_file = yaml.load(file, Loader=yaml.FullLoader)
main(parsed_yaml_file)


## Trying out multiple files so I can test concat and plotting csv's

In [2]:
import numpy as np
from datetime import datetime
import glob
import csv
import sys

sys.path.append('/home/Kevin.Dougherty/GSI_plots')

from runDiagnostics import conventional, satellite

def calculate_stats(data):
    
    n = len(data)
    
    mean = np.mean(data)
    var_list = [(x-mean)**2 for x in data]
    variance = np.sum(var_list)/(len(var_list)-1)
    std = np.sqrt(variance)
    mx = max(data)
    mn = min(data)
    
    rmse = np.sqrt(np.mean(np.square(data)))
    
    return mn, mx, mean, std, rmse

def write_csv(nc_file, csv_dict):
    
    fields = ['Date', 'min', 'max', 'mean', 'std', 'rmse']
    
    # filename of csv file
    filename = csv_dict[0]['Date'] + '_' + nc_file.split('/')[-1].split('.')[0] +'.csv'
    
    # write to csv file
    with open(filename, 'w') as csvfile:
        # creating a csv dict writer object
        writer = csv.DictWriter(csvfile, fieldnames=fields)

        # writing headers (filed names)
        writer.writeheader()

        # writing data rows
        writer.writerows(csv_dict)
        
        return
        

def main():
    
    dates = ['20200920', '20200921', '20200922']
    hrs   = ['00','06','12','18']
    
    path  = '/scratch2/NCEPDEV/stmp1/Kevin.Dougherty/ncDiags/'
    DATA_TYPE = 'O-F'
    
    
    for d in dates:
        for hr in hrs:
            
            nc_file = f'{path}gdas.{d}/{hr}/atmos/diag_conv_t_ges.{d}{hr}.nc4'
            
    
            print(nc_file)
            obs_id  = [120]
            qc_flag = []

            diag = conventional(nc_file)

            data = diag.getData(DATA_TYPE, obs_id, qc_flag)
        
#     else:
        
#         nc_file   = parsed_yaml_file['conventional input']['path']
#         channel   = parsed_yaml_file['conventional input']['channel']
#         qc_flag   = parsed_yaml_file['conventional input']['qc flag']
#         DATA_TYPE = parsed_yaml_file['conventional input']['data type'][0]
        
#         diag = satellite(nc_file)
        
#         data = diag.getData(DATA_TYPE, channel, qc_flag)
    
            metadata = diag.get_metadata()

            mn, mx, mean, std, rmse = calculate_stats(data)

            date = metadata['Date'].strftime("%Y%m%d%H")

            # dictionary of values needed in csv
            csv_dict = [{'Date' : date,
                        'min'  : mn,
                        'max'  : mx,
                        'mean' : mean,
                        'std'  : std,
                        'rmse' : rmse
                       }]

            write_csv(nc_file, csv_dict)
        
    return

        
#########################################################        


main()


/scratch2/NCEPDEV/stmp1/Kevin.Dougherty/ncDiags/gdas.20200920/00/atmos/diag_conv_t_ges.2020092000.nc4
/scratch2/NCEPDEV/stmp1/Kevin.Dougherty/ncDiags/gdas.20200920/06/atmos/diag_conv_t_ges.2020092006.nc4
/scratch2/NCEPDEV/stmp1/Kevin.Dougherty/ncDiags/gdas.20200920/12/atmos/diag_conv_t_ges.2020092012.nc4
/scratch2/NCEPDEV/stmp1/Kevin.Dougherty/ncDiags/gdas.20200920/18/atmos/diag_conv_t_ges.2020092018.nc4
/scratch2/NCEPDEV/stmp1/Kevin.Dougherty/ncDiags/gdas.20200921/00/atmos/diag_conv_t_ges.2020092100.nc4
/scratch2/NCEPDEV/stmp1/Kevin.Dougherty/ncDiags/gdas.20200921/06/atmos/diag_conv_t_ges.2020092106.nc4
/scratch2/NCEPDEV/stmp1/Kevin.Dougherty/ncDiags/gdas.20200921/12/atmos/diag_conv_t_ges.2020092112.nc4
/scratch2/NCEPDEV/stmp1/Kevin.Dougherty/ncDiags/gdas.20200921/18/atmos/diag_conv_t_ges.2020092118.nc4
/scratch2/NCEPDEV/stmp1/Kevin.Dougherty/ncDiags/gdas.20200922/00/atmos/diag_conv_t_ges.2020092200.nc4
/scratch2/NCEPDEV/stmp1/Kevin.Dougherty/ncDiags/gdas.20200922/06/atmos/diag_conv_t

#### Read all csv files in directory, store as pandas df

In [3]:
import os
import glob
import pandas as pd

os.chdir('/home/Kevin.Dougherty/GSI_plots')

files = [i for i in glob.glob('*.csv')]

combined_csv = pd.concat([pd.read_csv(f) for f in files])

In [4]:
combined_csv

Unnamed: 0,Date,min,max,mean,std,rmse
0,2020092100,-49.737965,11.135585,0.125913,1.518926,1.52411
0,2020092100,-49.737965,11.135585,0.125913,1.518926,1.52411
1,2020092106,-8.365621,6.158086,0.149694,1.129574,1.139301
2,2020092112,-41.445885,84.90975,0.14596,1.430502,1.437905
3,2020092118,-5.645886,6.786186,0.326647,1.159425,1.204417
0,2020092106,-8.365621,6.158086,0.149694,1.129574,1.139301
0,2020092112,-41.445885,84.90975,0.14596,1.430502,1.437905
0,2020092118,-5.645886,6.786186,0.326647,1.159425,1.204417
0,2020092000,-56.482166,48.553932,0.137342,1.638797,1.644514
0,2020092006,-6.557517,5.571989,0.135937,1.167907,1.175624


#### Create new csv file from combined files

In [5]:
combined_csv.to_csv( "combined_diag_conv_t_ges.csv", index=False, encoding='utf-8-sig')