# Imports

In [1]:
import pandas as pd
import json
import validate
from general import many_to_many, many_cols
import ingest
import clean

# Pull Data into Pandas

In [2]:
data = ingest.main()

In [3]:
excel_data = data['excel_data']
db = data['database']

# Cleaning

In [4]:
db = clean.remove_NaN_columns(db)

In [5]:
db = clean.strip_all_columns(db)

['iopse_id', 'event_type', 'foriegn_campaign_short_name', 'foriegn_deployment_short_name', 'description', 'region_description', 'published_list', 'reports', 'reference_file']


In [6]:
db['gcmd_phenomena']['code'] = db['gcmd_phenomena']['code'].apply(lambda x: str(x))

# Short Name Supplementation

In [7]:
# make unique shortname by combining the campaign name and the table sub short name

db['deployment']['short_name'] = db['deployment']['foriegn_campaign_short_name']+'_'+db['deployment']['deployment_id']

# this is not a true short_name
db['iopse']['short_name'] = db['iopse']['foriegn_campaign_short_name']+'_'+db['iopse']['iopse_id']

db['flight'] = many_to_many(db, 'linking', 'table_instrument', keep_all=True)
db['flight']['foriegn_deployment_short_name'] = db['flight']['foriegn_campaign_short_name']+'_'+db['flight']['foriegn_deployment']

# Find Matching Deployments for IOPSE

In [8]:
db['iopse']['deployment_short_name'] = 'No Matches'

for row in range(len(db['iopse'])):
    iop_start = db['iopse'].iloc[row]['start_date']
    iop_end = db['iopse'].iloc[row]['end_date']
    iop_camp = db['iopse'].iloc[row]['foriegn_campaign_short_name']
    
    campaign_filter = db['deployment']['foriegn_campaign_short_name'].apply(lambda short_name: short_name == iop_camp)
    possible_campaigns = db['deployment'][campaign_filter]
    
    start_filter = possible_campaigns['start_date'].apply(lambda dep_start: validate.vali_date(dep_start, iop_start))
    end_filter = possible_campaigns['end_date'].apply(lambda dep_end: validate.vali_date(iop_end, dep_end))
    
    matching_deployments = list(possible_campaigns[start_filter*end_filter]['short_name'])
    if len(matching_deployments)>1:
        print(f"error on {db['iopse'].iloc[row]}")
    elif len(matching_deployments)==1:
        matching_deployments = matching_deployments[0]
    else:
        matching_deployments = 'None Found'
    db['iopse']['deployment_short_name'].iloc[row]=matching_deployments



In [9]:
db['iopse']

2,iopse_id,event_type,foriegn_campaign_short_name,foriegn_deployment_short_name,start_date,end_date,description,region_description,published_list,reports,reference_file,short_name,deployment_short_name
3,iop_1,Y,HS3,Information Not Available,2014-09-11 00:00:00,2014-09-19 00:00:00,Hurricane Edouard - a period of apparent rapi...,Atlantic,https://journals.ametsoc.org/doi/pdf/10.1175/B...,Flight Reports: http://dx.doi.org/10.5067/HS3/...,Information Not Available,HS3_iop_1,HS3_dep_2014
4,iop_2,Y,HS3,Information Not Available,2012-09-11 00:00:00,2012-09-15 00:00:00,Hurricane Nadine - SAL interaction,Atlantic,https://journals.ametsoc.org/doi/pdf/10.1175/B...,Flight Reports: http://dx.doi.org/10.5067/HS3/...,Information Not Available,HS3_iop_2,HS3_dep_2012
5,iop_3,Y,HS3,Information Not Available,2014-10-15 00:00:00,2014-10-17 00:00:00,Hurricane Gonzalo - study of eyewall replacem...,Atlantic,https://journals.ametsoc.org/doi/pdf/10.1175/B...,Flight Reports: http://dx.doi.org/10.5067/HS3/...,Information Not Available,HS3_iop_3,HS3_dep_2014
6,iop_1,N,OLYMPEX,Information Not Available,2015-11-10 00:00:00,2015-12-21 00:00:00,(Almost) all instruments operating,Olympic Peninsula,https://journals.ametsoc.org/doi/pdf/10.1175/B...,Information Not Available,Information Not Available,OLYMPEX_iop_1,None Found
7,iop_1,Y,OLYMPEX,Information Not Available,2015-11-13 00:00:00,2015-11-17 00:00:00,Atmospheric river - consecutive events,Olympic Peninsula,https://journals.ametsoc.org/doi/pdf/10.1175/B...,Information Not Available,Information Not Available,OLYMPEX_iop_1,None Found
...,...,...,...,...,...,...,...,...,...,...,...,...,...
208,iop_1,IOP,NAAMES,dep_2015,2015-11-05 00:00:00,2015-12-02 00:00:00,Studied the winter phase of the phytoplankton ...,Northwestern Atlantic,https://doi.org/10.3389/fmars.2019.00122,Information Not Available,Information Not Available,NAAMES_iop_1,NAAMES_dep_2015
209,iop_1,Y,NAAMES,dep_2015,2015-11-19 00:00:00,2015-11-25 00:00:00,experienced shallow boundary layers and freque...,Northwestern Atlantic,https://doi.org/10.3389/fmars.2019.00122,Information Not Available,Information Not Available,NAAMES_iop_1,NAAMES_dep_2015
210,iop_2,IOP,NAAMES,dep_2016,2016-05-11 00:00:00,2016-06-05 00:00:00,Studied the transition phase of the phytoplank...,Northwestern Atlantic,https://doi.org/10.3389/fmars.2019.00122,Information Not Available,Information Not Available,NAAMES_iop_2,NAAMES_dep_2016
211,iop_2,Y,NAAMES,dep_2016,2016-05-25 00:00:00,2016-05-25 00:00:00,DMS concentrations in an especially shallow an...,Northwestern Atlantic,https://doi.org/10.3389/fmars.2019.00122,Information Not Available,Information Not Available,NAAMES_iop_2,NAAMES_dep_2016


# Many to Many Creation

In [10]:
main_table_names = ['campaign', 'platform', 'instrument', 'deployment', 'iopse']

In [11]:
for table in main_table_names:
    print(table)
    for column in [col for col in db[table].keys() if isinstance(col,str) and 'table' in col]:
        new_table_name = f"{table}_to_{column.replace('table_','')}"
        db[new_table_name]=many_to_many(db, table, column)
        print(f'   {new_table_name} created')

campaign
   campaign_to_focus_area created
   campaign_to_season created
   campaign_to_platform_type created
   campaign_to_gcmd_phenomenas_uuid created
   campaign_to_repository created
   campaign_to_partner_org created
   campaign_to_gcmd_project created
platform
   platform_to_gcmd_platform_uuid created
instrument
   instrument_to_gcmd_instrument created
   instrument_to_instrument_type created
   instrument_to_measurement_keywords_uuid created
   instrument_to_geophysical_concept created
   instrument_to_repository created
   instrument_to_measurement_region created
deployment
   deployment_to_geographical_region created
   deployment_to_platform created
iopse


# Validation

### Short Name Duplicates

In [12]:
for table_name in db.keys():
    if 'short_name' in db[table_name].keys():
        print(table_name)
        duplicates = validate.find_duplicates(db, table_name, 'short_name')

        print(f'    {duplicates}')

platform_type
    []
aircraft_type
    []
home_base
    []
repository
    []
focus_area
    []
season
    []
instrument_type
    ['insitulaboratorychemicalmetersanalyzers']
measurement_region
    []
geographical_region
    []
geophysical_concept
    ['informationnotavailable', 'ocean']
campaign
    ['informationnotavailable']
platform
    []
instrument
    ['atlas']
deployment
    []
iopse
    ['olympexiop1', 'informationnotavailableinformationnotavailable', 'carveiop1', 'carveiop2', 'carveiop3', 'gcpexiop1', 'acesiop1', 'dc3iop1', 'goesrpltiop1', 'goesrpltiop2', 'gripiop01', 'ariseiop1', 'nammaiop1', 'areseiop1', 'areseiop2', 'oraclesiop1', 'oraclesiop2', 'attrexiop1', 'attrexiop2', 'attrexiop3', 'c3vpiop2', 'c3vpiop3', 'c3vpiop4', 'calwateriop1', 'coraliop1', 'coraliop2', 'bromexiop1', 'aboveiop1', 'aboveiop4', 'aboveiop6', 'clamsiop1', 'clpxiop4', 'naamesiop1', 'naamesiop2']
gcmd_instrument
    ['atlas', 'informationnotavailable', 'wcr', nan, 'epic', 'opc', 'gnssreceiver', 'cris', '

### Foriegn Key Links

### Campaign

In [13]:
errors = validate.foriegn_keys(db, 
                                data_table='campaign_to_gcmd_project', 
                                data_index='campaign', 
                                data_column='gcmd_project', 
                                foriegn_table='gcmd_project', 
                                foriegn_column='uuid')
errors

Unnamed: 0,campaign,gcmd_project,suggestions
2,ACEPOL,e0a48b3c-ab3d-4331-b992-367352d5c09c,[]
16,C3VP,NID,[]
17,CalWater,NID,[]
77,ACTIVATE,TBD,[]
78,CAMP2Ex,TBD,[]
79,DCOTSS,TBD,[]
80,Delta-X,TBD,[]
83,IMPACTS,TBD,[]
84,S-MODE,TBD,[]
90,ARISE,NOT LISTED IN GCMD,[]


In [14]:
errors = validate.foriegn_keys(db, 
                                data_table='campaign_to_focus_area', 
                                data_index='campaign', 
                                data_column='focus_area', 
                                foriegn_table='focus_area', 
                                foriegn_column='short_name')
errors

Unnamed: 0,campaign,focus_area,suggestions
10,ACT-America,Climate Variability and Change,[Climate Variability & Change]
39,CLAMS,Earth Surface and Interior,[Earth Surface & Interior]
40,CLASIC07,Water & Energy Cycle,[Global Water & Energy Cycle]
82,ORACLES,Global Water and Energy Cycles,[Global Water & Energy Cycle]
102,TOGA COARE,Atmospheric Dynamics,[]
131,CASIE,Airborne Science,[]
133,CLPX II,Water & Energy Cycle,[Global Water & Energy Cycle]
134,CLPX III,Water & Energy Cycle,[Global Water & Energy Cycle]
137,DEVOTE,Airborne Science,[]
140,High Winds,Water & Energy Cycle,[Global Water & Energy Cycle]


In [15]:
errors = validate.foriegn_keys(db, 
                                data_table='campaign_to_season', 
                                data_index='campaign', 
                                data_column='season', 
                                foriegn_table='season', 
                                foriegn_column='short_name')
errors

Unnamed: 0,campaign,season,suggestions
106,Delta-X,Overshooting tops,[]
107,Delta-X,deep convection,[]
108,Delta-X,lower stratospheric air chemistry,[]
119,ARISE,boreal summer boreal fall,[]


In [16]:
errors = validate.foriegn_keys(db, 
                                data_table='campaign_to_platform_type', 
                                data_index='campaign', 
                                data_column='platform_type', 
                                foriegn_table='platform_type', 
                                foriegn_column='short_name')
errors

Unnamed: 0,campaign,platform_type,suggestions
1,ABoVE,Ground-based Platforms,"[Land-based Platforms, Water-based Platforms]"
10,ACT-America,Ground-based Platforms,"[Land-based Platforms, Water-based Platforms]"
26,ATom,Model Output,[]
31,BOREAS,Model Output,[]
32,BOREAS,Satellite,[Satellites]
...,...,...,...
300,GTE CITE-1A,ground-based platforms,"[Land-based Platforms, Water-based Platforms]"
302,GTE CITE-1B,ground-based platforms,"[Land-based Platforms, Water-based Platforms]"
304,GTE CITE-1C,ground-based platforms,"[Land-based Platforms, Water-based Platforms]"
306,GTE CITE-2,ground-based platforms,"[Land-based Platforms, Water-based Platforms]"


In [17]:
errors = validate.foriegn_keys(db, 
                                data_table='campaign_to_gcmd_phenomenas_uuid', 
                                data_index='campaign', 
                                data_column='gcmd_phenomenas_uuid', 
                                foriegn_table='gcmd_phenomena', 
                                foriegn_column='code')
errors

Unnamed: 0,campaign,gcmd_phenomenas_uuid,suggestions


In [18]:
errors = validate.foriegn_keys(db, 
                                data_table='campaign_to_repository', 
                                data_index='campaign', 
                                data_column='repository', 
                                foriegn_table='repository', 
                                foriegn_column='short_name')
errors

Unnamed: 0,campaign,repository,suggestions
0,ABoVE,ORNL,[]
2,ACEPOL,ASDC,[]
3,ACES,GHRC,[]
4,ACT-America,ORNL,[]
5,AfriSAR,ORNL,[]
...,...,...,...
190,SMEX04/NAME,NSIDC DAAC,[]
191,SMEX05/POLEX,NSIDC DAAC,[]
192,THORPEX,ASDC,[]
193,TRACE-A,ORNL,[]


In [19]:
errors = validate.foriegn_keys(db, 
                                data_table='campaign_to_partner_org', 
                                data_index='campaign', 
                                data_column='partner_org', 
                                foriegn_table='partner_org', 
                                foriegn_column='short_name')
errors

Unnamed: 0,campaign,partner_org,suggestions
11,ARCTAS,California Air Resources Board,[]
12,ARCTAS,International Polar Year,[]
17,ATTREX,DFG,[]
39,CORAL,NID,[]
40,CPEX,NID,[]
48,FIFE,NAC (Canada),[]
49,FIFE,AAFC (Canada),[]
54,HS3,Environment Canada,[]
61,IPHEx,Duke University,[]
62,IPHEx,NOAA Hydrometeorological Testbed,[]


In [20]:
errors = validate.foriegn_keys(db, 
                                data_table='platform_to_gcmd_platform_uuid', 
                                data_index='platform', 
                                data_column='gcmd_platform_uuid', 
                                foriegn_table='gcmd_platform', 
                                foriegn_column='uuid')
errors

Unnamed: 0,platform,gcmd_platform_uuid,suggestions


In [21]:
errors = validate.foriegn_keys(db, 
                                data_table='instrument_to_measurement_keywords_uuid', 
                                data_index='instrument', 
                                data_column='measurement_keywords_uuid', 
                                foriegn_table='gcmd_phenomena', 
                                foriegn_column='code')
errors

Unnamed: 0,instrument,measurement_keywords_uuid,suggestions
86,CPL,0,"[1000, 2000, 3000, 4000]"


In [22]:
errors = validate.foriegn_keys(db, 
                                data_table='instrument_to_gcmd_instrument', 
                                data_index='instrument', 
                                data_column='gcmd_instrument', 
                                foriegn_table='gcmd_instrument', 
                                foriegn_column='uuid')
errors

Unnamed: 0,instrument,gcmd_instrument,suggestions
6,4-STAR,b0f93e6a-c766-4957-8762-5c7709487459,[]
44,AVIRIS,d67afd03-3b79-419c-9289-5dde713ab904\n57854209...,[]
68,CIP,92f99316-b581-4adb-9980-aeb6bed64eee,[]
82,CPL,6238fe2-9a87-4e32-b866-c4a637094b51,[6238f3e2-9a87-4e32-b866-c4a637094b51]
117,EXRAD,a212d36d-2a4e-473f-b16a-6e2104b9dd8f\nba3de3fc...,[]
152,HSRL-2,abdf08cd-03c5-4497-87a4-65493584e2c7,[]
274,W-Band Radar,a90e-4a70-9bcb-93d106c1583f,[dc5ee11d-a90e-4a70-9bcb-93d106c1583f]


In [23]:
errors = validate.foriegn_keys(db, 
                                data_table='instrument_to_instrument_type', 
                                data_index='instrument', 
                                data_column='instrument_type', 
                                foriegn_table='instrument_type', 
                                foriegn_column='short_name')
errors

Unnamed: 0,instrument,instrument_type,suggestions
1,2D-C/P,Chemical Meters/Analyzers,[]
41,AVAPS,Profilers/Sounders,[]
79,CPL,LIDAR,[]
136,HAMSR,Spectrometers/Radiometers,[In Situ/Laboratory - Spectrometers/Radiometers]
141,HIRAD,Spectrometers/Radiometers,[In Situ/Laboratory - Spectrometers/Radiometers]
231,S-HIS,interferometer/sounder,[]


In [24]:
errors = validate.foriegn_keys(db, 
                                data_table='instrument_to_geophysical_concept', 
                                data_index='instrument', 
                                data_column='geophysical_concept', 
                                foriegn_table='geophysical_concept', 
                                foriegn_column='short_name')
errors

Unnamed: 0,instrument,geophysical_concept,suggestions


In [25]:
errors = validate.foriegn_keys(db, 
                                data_table='instrument_to_repository', 
                                data_index='instrument', 
                                data_column='repository', 
                                foriegn_table='repository', 
                                foriegn_column='short_name')
errors

Unnamed: 0,instrument,repository,suggestions
41,AVAPS,GHRC,[]
79,CPL,GHRC,[]
80,CPL,ORNL,[]
137,HAMSR,GHRC,[]
142,HIRAD,GHRC,[]
144,HIWRAP,GHRC,[]
232,S-HIS,GHRC,[]


In [26]:
errors = validate.foriegn_keys(db, 
                                data_table='instrument_to_measurement_region', 
                                data_index='instrument', 
                                data_column='measurement_region', 
                                foriegn_table='measurement_region', 
                                foriegn_column='short_name')
errors

Unnamed: 0,instrument,measurement_region,suggestions
141,HIRAD,boundary layer? sea surface is what I would ca...,[]


In [27]:
errors = validate.foriegn_keys(db, 
                                data_table='deployment_to_geographical_region', 
                                data_index='deployment', 
                                data_column='geographical_region', 
                                foriegn_table='geographical_region', 
                                foriegn_column='short_name')
errors

Unnamed: 0,deployment,geographical_region,suggestions
92,ABoVE_dep_2017,Alaska,[]
93,ABoVE_dep_2017,western Canada,[]
94,ABoVE_dep_2018,Alaska,[]
95,ABoVE_dep_2018,western Canada,[]
96,ABoVE_dep_2019,Alaska,[]
97,ABoVE_dep_2019,western Canada,[]
98,CLAMS_dep_2001,Chesapeake Bay,[]
99,CLAMS_dep_2001,Atlantic Ocean,[]


In [28]:
errors = validate.foriegn_keys(db, 
                                data_table='deployment_to_platform', 
                                data_index='deployment', 
                                data_column='platform', 
                                foriegn_table='platform', 
                                foriegn_column='short_name')
print('\n\ndo I really need to validate this?')
errors



do I really need to validate this?


Unnamed: 0,deployment,platform,suggestions
0,HS3_dep_2012,global hawk AV-1,[]
1,HS3_dep_2012,AV-6,[]
2,HS3_dep_2013,global hawk AV-1,[]
3,HS3_dep_2013,AV-6,[]
4,HS3_dep_2014,global hawk AV-6,[]
8,OLYMPEX_dep_2016,UND Citation II,[Citation]
23,ATom_dep_2016,NASA DC-8-AFRC,[]
24,ATom_dep_2017a,NASA DC-8-AFRC,[]
25,ATom_dep_2017b,NASA DC-8-AFRC,[]
26,ATom_dep_2018,NASA DC-8-AFRC,[]


In [29]:
errors = validate.foriegn_keys(db, 
                                data_table='deployment', 
                                data_index='short_name', 
                                data_column='foriegn_campaign_short_name', 
                                foriegn_table='campaign', 
                                foriegn_column='short_name')
print('\n\n TODO this better once you have the data')
errors



 TODO this better once you have the data


6,short_name,foriegn_campaign_short_name,suggestions


In [30]:
errors = validate.foriegn_keys(db, 
                                data_table='platform', 
                                data_index='short_name', 
                                data_column='foriegn_aircraft_type', 
                                foriegn_table='aircraft_type', 
                                foriegn_column='short_name')
errors

1,short_name,foriegn_aircraft_type,suggestions
7,ASO,Prop Plane,[]


In [31]:
errors = validate.foriegn_keys(db, 
                                data_table='platform_to_gcmd_platform_uuid', 
                                data_index='platform', 
                                data_column='gcmd_platform_uuid', 
                                foriegn_table='gcmd_platform', 
                                foriegn_column='uuid')
errors

Unnamed: 0,platform,gcmd_platform_uuid,suggestions


In [32]:
errors = validate.foriegn_keys(db, 
                                data_table='iopse', 
                                data_index='iopse_id', 
                                data_column='deployment_short_name', 
                                foriegn_table='deployment', 
                                foriegn_column='short_name')
errors

2,iopse_id,deployment_short_name,suggestions
6,iop_1,None Found,[]
7,iop_1,None Found,[]
8,iop_1,None Found,[]
9,iop_1,None Found,[]
10,iop_1,None Found,[]
12,Information Not Available,None Found,[]
13,Information Not Available,None Found,[]
14,Information Not Available,None Found,[]
30,Information Not Available,None Found,[]
40,iop_5,None Found,[]


In [33]:
errors = validate.foriegn_keys(db, 
                                data_table='iopse', 
                                data_index='iopse_id', 
                                data_column='event_type', 
                                foriegn_table='deployment', 
                                foriegn_column='short_name')
errors

2,iopse_id,event_type,suggestions
3,iop_1,Y,[]
4,iop_2,Y,[]
5,iop_3,Y,[]
6,iop_1,N,[]
7,iop_1,Y,[]
...,...,...,...
208,iop_1,IOP,[]
209,iop_1,Y,[]
210,iop_2,IOP,[]
211,iop_2,Y,[]


### Flight

In [34]:
# flight table, instruments

errors = validate.foriegn_keys(db, data_table='flight', 
                      data_index='foriegn_campaign_short_name', 
                      data_column='instrument', 
                      foriegn_table='instrument', 
                      foriegn_column='short_name')
errors

Unnamed: 0,foriegn_campaign_short_name,instrument,suggestions
26,IPHEx,2D-C,[2D-C/P]
33,IPHEx,Nevzorov,[Nevzorov probe]
34,IPHEx,King hot wire,[King hot wire probe]
129,SEAC4RS,Dew Point,[]
130,SEAC4RS,LWC/TWC,[]
131,SEAC4RS,Rosemount temperature,[]
132,SEAC4RS,Rosemount icing rod,[]
133,SEAC4RS,AIMMS-20,[]
134,SEAC4RS,NMASS,[MAS]
249,BOREAS,LI6262,[LICOR 6262]


In [35]:
# Flight table, platforms

errors = validate.foriegn_keys(db, 
                      data_table='flight', 
                      data_index='foriegn_campaign_short_name', 
                      data_column='platform', 
                      foriegn_table='platform', 
                      foriegn_column='short_name')
errors

Unnamed: 0,foriegn_campaign_short_name,platform,suggestions
15,HS3,WB-57f,[WB-57]
16,HS3,WB-57f,[WB-57]
19,OLYMPEX,UND Citation II,[Citation]


In [36]:
errors = validate.foriegn_keys(db, 
                      data_table='flight', 
                      data_index='foriegn_campaign_short_name', 
                      data_column='foriegn_deployment_short_name', 
                      foriegn_table='deployment', 
                      foriegn_column='short_name')
errors

Unnamed: 0,foriegn_campaign_short_name,foriegn_deployment_short_name,suggestions


# Dates

In [37]:
for table_name in db.keys():
    # find tables that have date fields
    if len(col_names := [col for col in db[table_name].columns if 'date' in col])>0:
        print(f"{table_name}\n    {', '.join(col_names)}")

campaign
    start_date, end_date
instrument
    deployment_date, decommision_date
deployment
    start_date, end_date
iopse
    start_date, end_date


In [38]:
table_names = ['campaign', 'deployment', 'iopse'] 
for table_name in table_names:
    print(table_name)
    db[table_name]['valid_date']=False
    
    db[table_name]['valid_date'] = db[table_name].apply(lambda row: validate.vali_date(row['start_date'], row['end_date']), axis=1)
        

campaign
    non date-time detected: 2017-04-26 00:00:00, ongoing
    non date-time detected: 3 week period in june, Information Not Available
    non date-time detected: 2015,  on-going
    non date-time detected: 2016-09-26 00:00:00, ongoing
    non date-time detected: 1905-06-18 00:00:00, Information Not Available
    non date-time detected: 1998, 1998
    non date-time detected: January ?, 2019, December ?, 2023
    non date-time detected: TBD, TBD
    non date-time detected: TBD, TBD
    non date-time detected: TBD, TBD
    non date-time detected: 1905-07-03 00:00:00, ongoing
    non date-time detected: 2007, 2008
    non date-time detected: 2007, 2008
    non date-time detected: Information Not Available, 1984-06-01 00:00:00
    non date-time detected: Information Not Available, 1984-06-01 00:00:00
    non date-time detected: Information Not Available, 1984-06-01 00:00:00
    non date-time detected: Information Not Available, 1983-07-01 00:00:00
    non date-time detected: Inform

In [39]:
table_names = ['instrument'] 
for table_name in table_names:
    
    db[table_name]['valid_date']=False
    db[table_name]['valid_date'] = db[table_name].apply(lambda row: validate.vali_date(row['deployment_date'], row['decommision_date']), axis=1)
       

    non date-time detected: 1997, 1997-present
    non date-time detected: 2000, Information Not Available
    non date-time detected: 2001, Information Not Available
    non date-time detected: 2010, Information Not Available
    non date-time detected: 1998, Information Not Available


In [40]:
db['campaign'][['start_date','end_date','valid_date']]

1,start_date,end_date,valid_date
3,2017-04-26 00:00:00,ongoing,False
4,Information Not Available,Information Not Available,False
5,2017-10-19 00:00:00,2017-11-09 00:00:00,True
6,2002-08-02 00:00:00,2002-08-30 00:00:00,True
7,2016-07-18 00:00:00,2019-07-29 00:00:00,True
...,...,...,...
190,Information Not Available,Information Not Available,False
191,Information Not Available,Information Not Available,False
192,Information Not Available,Information Not Available,False
193,Information Not Available,Information Not Available,False


In [41]:
assert 5==6

AssertionError: 

In [None]:
['campaign', 'deployment', 'iopse', 'instrument'] 

In [None]:
db['campaign'][['start_date','end_date','valid_date']]

In [None]:
db['campaign']['start_date'].iloc[0]

In [None]:
not(isinstance(db['campaign']['start_date'].iloc[0], datetime.datetime) and isinstance(db['campaign']['start_date'].iloc[0],datetime.datetime))

In [None]:
db['iopse']

In [None]:
table_name = 'iopse'
row = 1

start = db[table_name].iloc[row]['start_date']
end = db[table_name].iloc[row]['end_date']  



In [None]:
db[table_name]['valid_date'].iloc[row] = validate.vali_date(start, end)