Sort and Clean conference data.
It writes to `sorted_data.yml` and `cleaned_data.yml`, copy those to the conference.yml after screening.

In [1]:
import yaml
import datetime
import sys
from shutil import copyfile
from builtins import input
import pytz

In [2]:
try:
    # for python newer than 2.7
    from collections import OrderedDict
except ImportError:
    # use backport from pypi
    from ordereddict import OrderedDict

try:
    from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
    from yaml import Loader, Dumper
from yaml.representer import SafeRepresenter
_mapping_tag = yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG

def dict_representer(dumper, data):
    return dumper.represent_dict(data.iteritems())


def dict_constructor(loader, node):
    return OrderedDict(loader.construct_pairs(node))

Dumper.add_representer(OrderedDict, dict_representer)
Loader.add_constructor(_mapping_tag, dict_constructor)

Dumper.add_representer(str,
                       SafeRepresenter.represent_str)


def ordered_dump(data, stream=None, Dumper=yaml.Dumper, **kwds):
    class OrderedDumper(Dumper):
        pass
    def _dict_representer(dumper, data):
        return dumper.represent_mapping(
            yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
            data.items())
    OrderedDumper.add_representer(OrderedDict, _dict_representer)
    return yaml.dump(data, stream, OrderedDumper, **kwds)

dateformat = '%Y-%m-%d %H:%M:%S'
tba_words = ["tba","tbd"]

right_now = datetime.datetime.utcnow().replace(microsecond=0).strftime(dateformat)

In [3]:
def query_yes_no(question, default="no"):
    """Ask a yes/no question via input() and return their answer.

    "question" is a string that is presented to the user.
    "default" is the presumed answer if the user just hits <Enter>.
        It must be "yes" (the default), "no" or None (meaning
        an answer is required of the user).

    The "answer" return value is True for "yes" or False for "no".
    """
    valid = {"yes": True, "y": True, "ye": True,
             "no": False, "n": False}
    if default is None:
        prompt = " [y/n] "
    elif default == "yes":
        prompt = " [Y/n] "
    elif default == "no":
        prompt = " [y/N] "
    else:
        raise ValueError("invalid default answer: '%s'" % default)

    while True:
        sys.stdout.write(question + prompt)
        choice = input().lower()
        if default is not None and choice == '':
            return valid[default]
        elif choice in valid:
            return valid[choice]
        else:
            sys.stdout.write("Please respond with 'yes' or 'no' "
                             "(or 'y' or 'n').\n")

In [4]:
with open("../_data/conferences.yml", 'r') as stream:
    try:
        data = yaml.load(stream, Loader=Loader)
        print("Initial Sorting:")
        for q in data:
            print(q["deadline"]," - ",q["name"])
        print("\n\n")
        conf = [x for x in data if x['deadline'].lower() not in tba_words]
        tba  = [x for x in data if x['deadline'].lower() in tba_words]
        
        
        
        # just sort:
        conf.sort(key=lambda x: pytz.utc.normalize(datetime.datetime.strptime(x['deadline'], dateformat).replace(tzinfo=pytz.timezone(x['timezone']))))
        print("Date Sorting:")
        for q in conf+tba:
            print(q["deadline"]," - ",q["name"])
        print("\n\n")
        conf.sort(key=lambda x: pytz.utc.normalize(datetime.datetime.strptime(x['deadline'], dateformat).replace(tzinfo=pytz.timezone(x['timezone']))).strftime(dateformat) < right_now)
        print("Date and Passed Deadline Sorting with tba:")
        for q in conf+tba:
            print(q["deadline"]," - ",q["name"])
        print("\n\n")     
        
        with open('sorted_data.yml', 'w') as outfile:
            for line in ordered_dump(conf+tba, Dumper=yaml.SafeDumper, default_flow_style=False, explicit_start=True).replace('\'', '"').splitlines():
                outfile.write('\n')
                outfile.write(line.replace('- name:', '\n- name:'))
    except yaml.YAMLError as exc:
        print(exc)

Initial Sorting:
2018-03-02 23:59:59  -  Seismic Characterisation of Carbonate Platforms and Reservoirs
2018-03-06 23:59:59  -  SPE Argentina Exploration and Production of Unconventional Resources Symposium
2018-03-16 23:59:59  -  SPE Russian Petroleum Technology Conference
2018-03-18 23:59:59  -  SPE International Hydraulic Fracturing Technology Conference and Exhibition
2018-03-31 23:59:59  -  AAPG ICE 2018
2018-04-01 16:59:59  -  SEG Annual Meeting 2018
2018-04-15 23:59:59  -  Near Surface Geoscience Conference & Exhibition 2018
2018-04-23 23:59:59  -  SPE Annual Caspian Technical Conference & Exhibition
2018-05-01 23:59:59  -  The Abu Dhabi International Petroleum Exhibition & Conference (ADIPEC)
2018-05-10 23:59:59  -  EAGE Workshop on 4D Seismic and Reservoir Monitoring
2018-05-31 23:59:59  -  International Petroleum Technology Conference (IPTC)
2018-08-01 23:59:59  -  AGU Fall Meeting 2018
2018-08-18 00:00:00  -  International Conference on Geosciences and Environmental Geology


In [5]:
if query_yes_no("Did you check the sorted data and would like to replace the original data?"):
    copyfile('sorted_data.yml','../_data/conferences.yml')

Did you check the sorted data and would like to replace the original data? [y/N] y


In [6]:
with open('sorted_data.yml', 'r') as stream:
    try:
        conf = yaml.load(stream, Loader=Loader)
        print("Initial Data:")
        for q in conf:
            print(q["deadline"]," - ",q["name"])
        print("\n\n")   
        clean_conf = []
        for q in conf:
            dates,year=q["date"].split(",")
            start_date = dates.strip().split(" ")[0].strip()+" "+dates.split("-")[1].strip()+" "+year.strip()
            try:
                datetime.datetime.strptime(start_date, "%B %d %Y").strftime(dateformat)
            except ValueError:
                start_date = dates.split("-")[1].strip()+" "+year.strip()
            if datetime.datetime.strptime(start_date, "%B %d %Y").strftime(dateformat) >= right_now:
                clean_conf.append(q)
            else:
                print("Passed: "+q["deadline"]," - ",q["name"])
        print("\n\n")
        print("Cleaned Data:")
        for q in clean_conf:
            print(q["deadline"]," - ",q["name"])
        with open('cleaned_data.yml', 'w') as outfile:
            for line in ordered_dump(clean_conf, Dumper=yaml.SafeDumper, default_flow_style=False, explicit_start=True).replace('\'', '"').splitlines():
                outfile.write('\n')
                outfile.write(line.replace('- name:', '\n- name:'))
    except yaml.YAMLError as exc:
        print(exc)

Initial Data:
2018-03-02 23:59:59  -  Seismic Characterisation of Carbonate Platforms and Reservoirs
2018-03-06 23:59:59  -  SPE Argentina Exploration and Production of Unconventional Resources Symposium
2018-03-16 23:59:59  -  SPE Russian Petroleum Technology Conference
2018-03-18 23:59:59  -  SPE International Hydraulic Fracturing Technology Conference and Exhibition
2018-03-31 23:59:59  -  AAPG ICE 2018
2018-04-01 16:59:59  -  SEG Annual Meeting 2018
2018-04-15 23:59:59  -  Near Surface Geoscience Conference & Exhibition 2018
2018-04-23 23:59:59  -  SPE Annual Caspian Technical Conference & Exhibition
2018-05-01 23:59:59  -  The Abu Dhabi International Petroleum Exhibition & Conference (ADIPEC)
2018-05-10 23:59:59  -  EAGE Workshop on 4D Seismic and Reservoir Monitoring
2018-05-31 23:59:59  -  International Petroleum Technology Conference (IPTC)
2018-08-01 23:59:59  -  AGU Fall Meeting 2018
2018-08-18 00:00:00  -  International Conference on Geosciences and Environmental Geology
201

In [7]:
if query_yes_no("Did you check the cleaned data and would like to replace the original data?"):
    copyfile('cleaned_data.yml','../_data/conferences.yml')

Did you check the cleaned data and would like to replace the original data? [y/N] y


Thanks to https://gist.github.com/oglops/c70fb69eef42d40bed06