In [1]:
import datetime
import pandas

  from .tslib import iNaT, NaT, Timestamp, Timedelta, OutOfBoundsDatetime
  from pandas._libs import (hashtable as _hashtable,
  from pandas._libs import algos, lib
  from pandas._libs import hashing, tslib
  from pandas._libs import (lib, index as libindex, tslib as libts,
  import pandas._libs.tslibs.offsets as liboffsets
  from pandas._libs import algos as libalgos, ops as libops
  from pandas._libs.interval import (
  from pandas._libs import internals as libinternals
  import pandas._libs.sparse as splib
  import pandas._libs.window as _window
  from pandas._libs import (lib, reduction,
  from pandas._libs import algos as _algos, reshape as _reshape
  import pandas._libs.parsers as parsers
  from pandas._libs import algos, lib, writers as libwriters


In [2]:
def polar_to_datetime(row):
    currTimeStr = row["time"]
    # split the string and get the 2nd part
    onsetTimeStr = currTimeStr.split("-")[1]
    prevFrameTimeStr = currTimeStr.split("-")[0]
    currDateStr = str(row["dateStr"])
    # sometimes the date extends to the next
    # day and is represented as a time starting
    # with 24 instead of 00. We'll have to work
    # with it
    if onsetTimeStr[0:2] == "24":
        onsetTimeStr = "00" + onsetTimeStr[2:]
        newDateStr = currDateStr[:-2] + str(int(currDateStr[-2:]) + 1)
        row["date"] = pandas.to_datetime( newDateStr\
                        + ":" + onsetTimeStr,\
                        format="%Y%m%d:%H%M%S" ).to_pydatetime()
        # this check is for identifying typos in the data file
        # and therefore discard bad values!
        prevFrameDate = pandas.to_datetime( currDateStr\
                        + ":" + prevFrameTimeStr,\
                        format="%Y%m%d:%H%M%S" )
        currFrameDate = pandas.to_datetime( newDateStr\
                        + ":" + onsetTimeStr,\
                        format="%Y%m%d:%H%M%S" )
        row["delT1T2"] = (currFrameDate - prevFrameDate).total_seconds()/60.
    else:
        row["date"] = pandas.to_datetime( currDateStr\
                        + ":" + onsetTimeStr,\
                        format="%Y%m%d:%H%M%S" ).to_pydatetime()
        # this check is for identifying typos in the data file
        # and therefore discard bad values!
        prevFrameDate = pandas.to_datetime( currDateStr\
                        + ":" + prevFrameTimeStr,\
                        format="%Y%m%d:%H%M%S" )
        currFrameDate = pandas.to_datetime( currDateStr\
                        + ":" + onsetTimeStr,\
                        format="%Y%m%d:%H%M%S" )
        row["delT1T2"] = (currFrameDate - prevFrameDate).total_seconds()/60.
    return row

In [3]:
# read the polar uvi dataset
# it is a little complicated to read
# and needs some manual cleansing
uviFile = "../data/uvi_onset.dat.txt"
colNames = [ "dateStr", "time", "mlat",\
            "mlt", "glat", "glon" ]

In [4]:
uviData = pandas.read_csv( uviFile,\
                skiprows=21, delim_whitespace=True )
uviData = uviData[ ["date", "time1", "time2",\
                    "mlat", "mlt", "glat"] ]
uviData.columns = colNames
uviData = uviData.apply( polar_to_datetime, axis=1 )
uviData["date"] = pandas.to_datetime(uviData['date'])
uviData.head()

Unnamed: 0,dateStr,time,mlat,mlt,glat,glon,date,delT1T2
0,19960330,041130-041157,68.7,359.3,58.6,287.9,1996-03-30 04:11:57,0.45
1,19960330,051821-051848,68.5,353.9,57.6,274.4,1996-03-30 05:18:48,0.45
2,19960330,065135-065202,69.5,336.0,61.5,249.1,1996-03-30 06:52:02,0.45
3,19960330,224138-224205,65.9,353.4,67.0,355.1,1996-03-30 22:42:05,0.45
4,19960331,002354-002554,66.5,346.9,62.4,321.4,1996-03-31 00:25:54,2.0


In [5]:
# discard rows with bad delT1T2 values
uviData = uviData[ (uviData["delT1T2"] >= 0.) & (uviData["delT1T2"] < 10.)\
                 ].reset_index(drop=True)
uviData = uviData[ ["date", "mlat", "mlt", "glat", "glon"] ]
# sort the df just to make sure things are in order
uviData.sort_values(by='date', inplace=True)
# Now MLAT is showing up as a string, change to float
uviData["mlat"] = pandas.to_numeric(uviData["mlat"])
uviData.head()

Unnamed: 0,date,mlat,mlt,glat,glon
0,1996-03-30 04:11:57,68.7,359.3,58.6,287.9
1,1996-03-30 05:18:48,68.5,353.9,57.6,274.4
2,1996-03-30 06:52:02,69.5,336.0,61.5,249.1
3,1996-03-30 22:42:05,65.9,353.4,67.0,355.1
4,1996-03-31 00:25:54,66.5,346.9,62.4,321.4


In [6]:
# read the image fuv data
imgFile = "../data/substorms_2000_2005.txt"
imgColNames = [ "inst_date", "X", "Y", "Dist",\
               "Counts", "glat", "glon", "mlat",\
               "mlon", "mlt" ]

In [7]:
def image_to_datetime(row):
    currInstDTArr = row["inst_date"].split("_")
    row["date"] = pandas.to_datetime( currInstDTArr[1] + currInstDTArr[2]\
                    + "-" + currInstDTArr[3],\
                    format="%Y%m%d-%H:%M:%S" ).to_pydatetime()
    return row

In [8]:
imgData = pandas.read_csv(imgFile, delim_whitespace=True)
imgData.columns = imgColNames
imgData = imgData.apply( image_to_datetime, axis=1 )
imgData["date"] = pandas.to_datetime(imgData['date'])
imgData = imgData[ ["date", "glat", "glon", "mlat", "mlon", "mlt"] ]
# sort the df just to make sure things are in order
imgData.sort_values(by='date', inplace=True)
imgData.head()

Unnamed: 0,date,glat,glon,mlat,mlon,mlt
0,2000-05-16 17:47:17,69.04,84.96,63.97,158.94,23.76
1,2000-05-18 09:55:40,67.46,191.68,64.23,245.74,21.51
2,2000-05-19 13:40:22,71.79,166.96,66.53,225.19,23.66
3,2000-05-20 02:49:06,59.07,294.52,68.02,17.79,23.31
4,2000-05-20 04:43:06,56.13,269.26,66.66,337.7,22.51


In [9]:
# save both DFs into a feather file
uviData.to_feather("../data/polar_data.feather")
imgData.to_feather("../data/image_data.feather")

  from ._conv import register_converters as _register_converters
  from ._conv import register_converters as _register_converters
  from . import h5a, h5d, h5ds, h5f, h5fd, h5g, h5r, h5s, h5t, h5p, h5z
  from .. import h5g, h5i, h5o, h5r, h5t, h5l, h5p
  from . import _csparsetools
  from ._shortest_path import shortest_path, floyd_warshall, dijkstra,\
  from ._tools import csgraph_to_dense, csgraph_from_dense,\
  from ._traversal import breadth_first_order, depth_first_order, \
  from ._min_spanning_tree import minimum_spanning_tree
  from ._reordering import reverse_cuthill_mckee, maximum_bipartite_matching, \
  from ._solve_toeplitz import levinson
  from ._decomp_update import *
  from ._ufuncs import *
  from ._ellip_harm_2 import _ellipsoid, _ellipsoid_norm
  from ._group_columns import group_dense, group_sparse
  from . import _bspl
  from .ckdtree import *
  from .qhull import *
  from . import _voronoi
  from . import _hausdorff
  from . import _ni_label
