-
Notifications
You must be signed in to change notification settings - Fork 1
/
6_remove_nans.py
43 lines (36 loc) · 1.6 KB
/
6_remove_nans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import glob
from os.path import basename
import pandas as pd
from clim_helpers import concat_vvd_files
# Remove rows with variable == nan? (lots of IOS 1990's data)
nan_dir = 'C:\\Users\\HourstonH\\Documents\\NEP_climatology\\data\\' \
'value_vs_depth\\6_filtered_for_nans\\sep_by_origin\\'
qc_dir = 'C:\\Users\\HourstonH\\Documents\\NEP_climatology\\data\\' \
'value_vs_depth\\5_filtered_for_quality_flag\\'
for var in ['Temp', 'Sal']:
qc_files = glob.glob(qc_dir + '*{}*.csv'.format(var))
print(len(qc_files))
for f in qc_files:
print(basename(f))
df = pd.read_csv(f)
print('Starting df length:', len(df))
# Drop all rows that have df.Value == NaN
df.dropna(axis='index', subset=['Depth_m', 'Value'], inplace=True)
print('Ending df length:', len(df))
# Export df
outname = basename(f).replace('qc', 'nan_rm')
df.to_csv(nan_dir + outname, index=False)
# Not doing this step for TS because they're too big
# Put all nan vvds into one file? (except WOD PFL)
all_name = 'Oxy_1991_2020_value_vs_depth_nan_rm.csv'
nan_files = glob.glob(nan_dir + '*.csv')
nan_files.remove(nan_dir + 'WOD_PFL_Oxy_1991_2020_value_vs_depth_nan_rm.csv')
fname = concat_vvd_files(nan_files, nan_dir, all_name)
# # WOD PFL Oxy data: did this at step 2!!
# indir = 'C:\\Users\\HourstonH\\Documents\\NEP_climatology\\data\\' \
# 'value_vs_depth\\2_added_dup_flags\\'
# dup_files = glob.glob(indir + 'WOD_PFL_Oxy*.csv')
# print(len(dup_files))
# fname = 'WOD_PFL_Oxy_1991_2020_value_vs_depth_dup.csv'
#
# concat_vvd_files(dup_files, indir, fname)