In [2]:
import pandas as pd
import numpy as np
import re
import sys
import os

from PIL import Image
from decimal import Decimal, setcontext, ExtendedContext
import pytesseract as ocr
import urllib.request
import glob

#### Left to add - The sanity check from gps range & state

Load GPS Parser

In [3]:
dms_parser_re = re.compile(r"""\b
    # Latitude direction, first position: one of N, S, NORTH, SOUTH
    ((?P<latdir>NORTH|SOUTH|LAT|LAT\.|LATITUDE|[NS])\s?)?
    # Latitude degrees: two digits 0-90
    (?P<latdeg>([0-8][0-9])|90)
    # Optional space, degree mark, period,
    # or word separating degrees and minutes
    (\s|(?P<degmark>\s?(º|°)?\s?|(?P<degpd>\.)|-|\sDEGREES,\s)\s?)?
    (?P<latminsec>
    # Latitude minutes: two digits 0-59
    (?P<latmin>([0-5]?[0-9]))
    # If there was a degree mark before, look for punctuation after the minutes

    (\s|(?(degmark)('|\"|\sMINUTES(,\s)?)\s?))?
    (
    # Latitude seconds: two digits
    (
    ((?(degpd)\.?)(?P<latsec>(\d{1,2})(?=\d*)(\.\d*)))|
    # Decimal fraction of minutes
    (?P<latdecsec>\.(\d)+))?)
    (\s|(?(degmark)(\"|'|\sSECONDS\s)?))?
    )?
    # Latitude direction, second position, optionally preceded by a space
    (\s?(?P<latdir2>(?(latdir)|(NORTH|SOUTH|LAT|LAT\.|LATITUDE|[NS]))))?
    # Latitude/longitude delimiter: space, forward slash, comma, or none
    (?<=\s|\d|\"|'|H|N|S)(?:[)(\ /;:,]){0,2}
    # Longitude direction, first position: one of E, W, EAST, WEST
    (?(latdir)((?P<longdir>EAST|WEST|LON|LON\.|LONG|LONGITUDE|[EW])\s?))?
    # Longitude degrees: two or three digits
    (?P<longdeg>((1(([0-7][0-9]|80))|(0?[0-9][0-9]))))
    # If there was a degree mark before, look for another one here
    (\s|(?(degmark)(\s?(º|°)\s?|\.|-|\sDEGREES,\s)))?
    (?(latminsec)   #Only look for minutes and seconds in the longitude
    (?P<longminsec> #if they were there in the latitude
    # Longitude minutes: two digits
    (?P<longmin>([0-5]?[0-9]))
    # If there was a degree mark before, look for punctuation after the minutes
    (\s|(?(degmark)('|\"|\sMINUTES(,\s)?)\s?))?
    # Longitude seconds: two digits
    (
    ((?(degpd)\.?)(?P<longsec>(\d{1,2})(?=\d*)(\.\d*)))|
    # Decimal fraction of minutes
    (?P<longdecsec>\.(\d)+))?)
    (\s|(?(degmark)(\"|'|\sSECONDS\s)?))?
    )
    #Longitude direction, second position: optionally preceded by a space
    (?(latdir)|\s?(?P<longdir2>(EAST|WEST|LON|LON\.|LONG|LONGITUDE|[EW])))?
    \b
    """, re.VERBOSE | re.UNICODE | re.IGNORECASE)

#dd_parser_re = re.compile(
#        r"""\b
#        (?P<latdir1>NORTH|SOUTH|[NS])?
#        (?P<decLat>-?\d+\.(?=\d)\d*)
#        (?P<latdegmark>(º|°|\*)?\s?|-|\sDEGREES,\s)?
#        (?P<latdir2>\s?NORTH|SOUTH|[NS])?
#        (?:\s?[,:;°]?\s?)?
#        (?(latdir1)((?P<longdir1>EAST|WEST|[EW])\ ?))?
#        (?P<decLon>-?\d*\.(?=\d)\d*\s?(?!\s?.{0,2}(mi|km)))
#        (?P<londegmark>(º|°|\*)?\s?|-|\sDEGREES,\s)?
#        (?(latdir2)((?P<longdir2>EAST|WEST|[EW])))?
#        \b""", re.VERBOSE | re.UNICODE | re.IGNORECASE)

dd_parser_re = re.compile(
    r"""\b
    (?P<latdir1>(NORTH|SOUTH|LAT|LAT\.|LATITUDE|[NS]))?
    (?:\s|\:)*
    (?P<decLat>(-|—)?\d+\.\d{3,})
    (?P<latdegmark>(º|°|\*|\s|-|\sDEGREES,\s))?
    (?P<latdir2>\s?(NORTH|SOUTH|LAT|LAT\.|LATITUDE|[NS]))?
    (?:\s)?
    (?:\s?[,:;°/]?\s?)?
    (?(latdir1)((?P<longdir1>(EAST|WEST|LON|LON\.|LONG|LONGITUDE|[EW]))\s?))?
    (?:\s|\:)*
    (?P<decLon>(-|—)?\d*\.(?=\d)\d*\s?(?!\s?.{0,2}(mi|km)))
    (?P<londegmark>(º|°|\*)?\s?|-|\sDEGREES,\s)?
    (?(latdir2)((?P<longdir2>EAST|WEST|LON|LON\.|LONG|LONGITUDE|[EW])))?
    (?:\s)?
    \b""", re.VERBOSE | re.UNICODE | re.IGNORECASE)



def cleanup(parts):
    """
    NOTICE! WILL have catastrophic failure if passed decimal degree format.
    Normalize up the parts matched by :obj:`parser.parser_re` to
    degrees, minutes, and seconds.
    >>> _cleanup({'latdir': 'south', 'longdir': 'west',
    ...          'latdeg':'60','latmin':'30',
    ...          'longdeg':'50','longmin':'40'})
    ['S', '60', '30', '00', 'W', '50', '40', '00']
    >>> _cleanup({'latdir': 'south', 'longdir': 'west',
    ...          'latdeg':'60','latmin':'30', 'latdecsec':'.50',
    ...          'longdeg':'50','longmin':'40','longdecsec':'.90'})
    ['S', '60', '30.50', '00', 'W', '50', '40.90', '00']
    """

    try:
        latdir = (parts['latdir'] or parts['latdir2']).upper()[0]
        longdir = (parts['longdir'] or parts['longdir2']).upper()[0]
    except AttributeError:
        latdir, longdir = '', ''

    latdeg = parts.get('latdeg')
    longdeg = parts.get('longdeg')

    latmin = parts.get('latmin', '00') or '00'
    longmin = parts.get('longmin', '00') or '00'

    latdecsec = parts.get('latdecsec', '')
    longdecsec = parts.get('longdecsec', '')

    if (latdecsec and longdecsec):
        latmin += latdecsec
        longmin += longdecsec
        latsec = '00'
        longsec = '00'
    else:
        latsec = parts.get('latsec', '') or '00'
        longsec = parts.get('longsec', '') or '00'

    return [latdir, latdeg, latmin, latsec, longdir, longdeg, longmin, longsec]


def convert(latdir, latdeg, latmin, latsec, longdir, longdeg, longmin, longsec):
    """
    Convert normalized degrees, minutes, and seconds to decimal degrees.
    Quantize the converted value based on the input precision and
    return a 2-tuple of strings.
    >>> _convert('S','50','30','30','W','50','30','30')
    ('-50.508333', '-50.508333')
    >>> _convert('N','50','27','55','W','127','27','65')
    ('50.459167', '-127.460833')
    """
    
    if (latsec != '00' or longsec != '00'):
        precision = Decimal('0.000001')
    elif (latmin != '00' or longmin != '00'):
        precision = Decimal('0.001')
    else:
        precision = Decimal('1')

    latitude = Decimal(latdeg)
    latmin = Decimal(latmin)
    latsec = Decimal(latsec)

    longitude = Decimal(longdeg)
    longmin = Decimal(longmin)
    longsec = Decimal(longsec)

    if latsec > 59 or longsec > 59:
        #Assume that 'seconds' greater than 59 are actually a decimal
        #fraction of minutes
        latitude += (latmin +
                     (latsec / Decimal('100'))) / Decimal('60')
        longitude += (longmin +
                  (longsec / Decimal('100'))) / Decimal('60')
    else:
        latitude += (latmin +
                     (latsec / Decimal('60'))) / Decimal('60')
        longitude += (longmin +
                      (longsec / Decimal('60'))) / Decimal('60')

    if latdir == 'S':
        latitude *= Decimal('-1')

    if longdir == 'W':
        longitude *= Decimal('-1')

    lat_str = str(latitude.quantize(precision))
    long_str = str(longitude.quantize(precision))

    return (lat_str, long_str)

def parseDMS(s):
    matches = dms_parser_re.finditer(s)
    results = []
    for match in matches:
        dms = cleanup(match.groupdict())
        results.append(convert(*dms))
    matches = [e for l in results for e in l]
    
    if matches:
        if len(matches) == 2:
            lat, lon = matches
            return lat, lon
    
    return None

def extractDD(s):
    ''' extract decimal degree coords attempt to wrangle them into [lat,lon] as decimal degrees'''
    #s = re.sub(r'\s+', ' ', s ) # convert duplicate spaces into single 
    matches = dd_parser_re.finditer(s)
    results = []
    for match in matches:
        parts = match.groupdict()
        ddLat = float(parts.get('decLat', '') or '00')
        ddLon = float(parts.get('decLon', '') or '00')
        # if we harvested the directionality, then use them to try and clean the coords up.
        try:
            latdir = (parts['latdir1'] or parts['latdir2']).upper()[0]
            longdir = (parts['longdir1'] or parts['longdir2']).upper()[0]
        except AttributeError:
            latdir, longdir = '', ''

        if (latdir == 'N') & (ddLat < 0):
            ddLat = abs(ddLat)
        elif (latdir == 'S') & (ddLat > 0):
            ddLat = -ddLat

        if (longdir == 'E') & (ddLon < 0):
            ddLon = abs(ddLon)
        elif (longdir == 'W') & (ddLon > 0):
            ddLon = -ddLon
        
        results.append((str(ddLat), str(ddLon)))
    
    matches = [e for l in results for e in l]
    
    if matches:
        if len(matches) == 2:
            lat, lon = matches
            return lat, lon
    return None
       

        #results.append((ddLat,ddLon))
    #matches = [e for l in results for e in l]
        
    # make sure we ended up with 2 values.
    
   #return lat, lon
 
def parseGPS(s):
#    lines = [x.strip() for x in s.splitlines()]
#    results = []
#    for line in lines:
    result = extractDD(s)
    #result = parseDMS(s)
    if result != None:
#            results.append(result)
        return result
    else:
        result = parseDMS(s)
        #result = extractDD(s)
        if result != None:
#                results.append(result)
            return result
#        if result:
        #result = max(results, key=len)
#            return result

    return None

image download

In [None]:


#NOTE: Download the query results with UTM-8 encoding.#
###########Note############
#This script was made to let a researcher access all the images from a query locally.
#This script downloads ALL images in the csv to the working directory (in place)
#the files images.csv and occurrences.csv must be present in the folder in which
#this script is run. Those files are bundled with a Symbiota format query csv download
###########################

imgCSV = pd.read_csv("images.csv",encoding='utf-8')
occCSV = pd.read_csv("occurrences.csv",encoding='utf-8')
occCSV = occCSV.merge(imgCSV,left_on = 'id',right_on='coreid', how = 'inner')

try:
    for index, row in occCSV.sample(30).iterrows():
        if row['catalogNumber'].isnumeric():
            recordID = str(row['institutionCode']+row['catalogNumber'])
        else:
            recordID = row['catalogNumber']
        fileName = './images/{}.jpg'.format(recordID)
        url = row['accessURI']
        urllib.request.urlretrieve(url, fileName)
except:
    print('Problem at {}, skipping it'.format(fileName))
    pass


OCR Image analysis

Using accuracy training data from:
https://github.com/tesseract-ocr/tessdata_best/blob/master/eng.traineddata


In [3]:
df = pd.DataFrame(columns = ['catalogNumber', 'rawOCR','decimalLatitude', 'decimalLongitude','verbatimCoordinates'])

images = glob.glob('./images/*jpg')
# a list of known, necessary replacements after ocr, ahead of regex
replacements = {'—':'-'}

for fileName in images:
    img = Image.open(fileName)
    #rawOcr = ocr.image_to_string(img, lang='eng')
    rawOcr = os.linesep.join([s for s in ocr.image_to_string(img, lang='eng').splitlines() if s])
    for key,val in replacements.items():
        rawOcr = rawOcr.replace(key,val)
    catalogNumber = os.path.basename(fileName).replace('.jpg','')
    rowData = pd.Series({'catalogNumber':catalogNumber, 'rawOCR':rawOcr})
    df = df.append(rowData,ignore_index=True)

df['verbatimCoordinates'] = df['rawOCR'].transform(parseGPS)
df = df.fillna('')
df[['decimalLatitude', 'decimalLongitude']] = df['verbatimCoordinates'].apply(pd.Series)

In [4]:
display(df)


Unnamed: 0,catalogNumber,rawOCR,decimalLatitude,decimalLongitude,verbatimCoordinates
0,TENN-V-0015524,UNNERﬂTYOFTENNESSEEKNUXVHLE\nrmumugmmwwwwwWWI»...,19.0,75.0,"(19, 75)"
1,MTSU000623,County:\nDet:\nColl.:\n5 6 7 8 9\nPLANTS OF TE...,19.0,79.0,"(19, 79)"
2,TENN-V-0015470,UNIVERSITY OF TENNESSEE KNOX IL\n1|MLLlLlJlﬂl_...,,,
3,HTTU000727,Herbarium\nTennessee Technological University\...,,,
4,ETSU001292,J Hezbullni EAST TENNESSEE STATE UNIVERSITY\n”...,44.0,31.0,"(44, 31)"
5,MTSU000631,4 5 6 7 8 9 10\n3 ' sonmm\n2NEC Digitization P...,,,
6,UCHT003473,"SERNEC Digitization Projé"" -Tehne§se§ Heriggri...",,,
7,ETSU001290,EAST TENNESSEE STATE UNIVERSIT\nl1111111111111...,,,
8,UCHT003483,UNIVERSITYT OTF TENNESSEE\nl|)|‘$\||||NIHlﬂlll...,,,
9,TENN-V-0015442,UNIVERSITY OF TENNESSEE KNOXVILLE\n1111(ijWlml...,,,


In [5]:
df.to_csv('output.csv', encoding = 'utf-8', index = False)

In [6]:
df = df[df['verbatimCoordinates'].notna()]
for index,row in df.iterrows():
    display(row['rawOCR'].replace('\n', ''))
    display(row['verbatimCoordinates'])

"UNNERﬂTYOFTENNESSEEKNUXVHLErmumugmmwwwwwWWI»  PLANTS OF OHIO 9.12m“? ‘30 IIERBARIUDI OF OBERLIN COLLEGETsuga cahadensis (Lo) Carrin mixed woods near streamWooster Memorial Park, Plain Twp.March 11, 1975 «CML George T. Jones3 4 5 6 7 ' 8 1oSERNEC Digitization Project - Tennessee Herbarium Consortium"

('19', '75')

'County:Det:Coll.:5 6 7 8 9PLANTS OF TENNESSEERuther§ord Date: 3 Dec 1979PinaceaeTsuga canadensis L.Eastern Hemlock@ $5 yds S.E. Student UnionBulld. MTSU Campus@ 5" DBH @ 27\' HeightWMPWMPODath 1*83 1N10F¢ﬁ cogg 10 i lui“. n n ro\'ect - Tennessee Herbarium Consortium J MIDDLETENNESSEESTATE UNIVERSITYHII‘IIWIIWHIWIHIIW||ﬂL|ﬂ|ljlljlllWHlllMTSUOO'

('19', '79')

'UNIVERSITY OF TENNESSEE KNOX IL1|MLLlLlJlﬂl_|lﬂU1lUL|lt1UMmm\\,/‘_. ._ r r7“, ’7".‘”‘T.‘"1 ‘ .J,-u.1;4-‘1       ~ ~ ‘ ~ V. ., 1-,, - ,. M, + x:Lgncu Amer ufccdcf L\\L;&CvOI‘ b;h:- / “7\'31!" A\'ﬁ mﬁﬁp,‘ﬁ\'-1ﬁ m--~mm---------0 1 2 3 4 5 5 7 \' 8 910cmSERNEC Digitization Project - Tennessee Herbarium Consortium'

''

'HerbariumTennessee Technological UniversityPlants ofTsuga canadensis (L) Carr.#65” HERBARIUMp4‘ , , _ Spring Creek, ca. 1.0 mi. N. of Cookeville.LTENNESSEE TtCﬁNU-LGa‘lzi’AL ?SO\'-9SO\' elevation. hO‘ tall. Woody, rockv,UNIVERSE” tank just above creek.IENNESSEE TECHNOLOGICAL UNIVERSITYI"I‘ll!IWIIMINIWINIIWIWIWHIHUHHIIIIll! Wm 1‘HTTU000727 K.L. French, 3. I\'»\'£cIn.turff, S. P/Iankikar~11 , - 2.4,:mrzh:2 3 4 5 6 7 8 9 10SERNEC Digitization Project - Tennessee Herbarium Consortium'

''

'J Hezbullni EAST TENNESSEE STATE UNIVERSITY”79" Ull/lllllllllHilllliltlilﬂlwljllﬂlﬂlﬁﬂlWWIIllllETSU   I’Fx/m-Lg- At-Plants of lﬁ »w5_=,;wI. ! J v i 1 \\:V Q \\4h .1 I“ G ‘.v ?,T /h”7J .4 ﬂ i/y‘ / 4 Ap”JV, : w.L Q LEL‘ v _ ‘ (L )(44:31 A v .. :I‘Det: ,L11L;\'~ L J Vf‘4 6 7 8SERNEC Digitizgtion Projecnt - Tennessee Herbarium Consortium9 10'

('44', '31')

'4 5 6 7 8 9 103 \' sonmm2NEC Digitization Project - Tennessee Herbarlum ConSER MIDDLE TENNESSEEATE UNIVE lIr11mmx/ImI/x/m/mI MIMI»MTSU000631  , ”714/ ‘ﬂ/QH/f; if” f/C’ll/IUC’SIC’C‘ /‘727\' 741/ @e wry, xvs/ﬁzz le’r w, 7 \\ﬂ\'zucnic‘ic‘f 5/ ’ 7 L ,"2/3“qu Ccv/c’aa/(E/L/J/J\' (L) C,¢7rr/.\' ‘ /<52752161711 ﬂﬂn\'x/er , .L a 5/” \' €67: K/O ‘2/c7// . .06/71 m / ’ / / 7-JJ/’» 6(I/ra‘/Jauf/Izcldifﬂc7rwév/ ryf 55"“ r’!’ {7 b /Umaxm , éaaf/ziaw u z’, w pm, W .Dgn) F. q MLUA/,Dc‘f" ,’ 13/:3'

''

'SERNEC Digitization Projé" -Tehne§se§ Heriggrium ConsortiumUCHTUNIVERSITY OF TENNESSEEII/I/xﬁl/M/mliﬂiﬂiW/iﬁﬁwll73 iacM/HHerbarium of the University of Tennessee at ChattanoogaTsuga cangdensis (L.) Carr.North on Hwy127 towards Signal Mtn. Leftat the‘first stop light. North to Timber~links Drive. North on Timberlinks toan unmarked foot trail. West on Trail toa tributary of Short Creek. Followtributary to Short Creek. Specmmen foundin the riparian habitat of Short Creek inPrentice Cooper State Forest.Tree to 15m tall.Hamilton Co., Tennessee Elev.ca.1560ft,John Beck# 3053 15 March 1999Voucher specimen.3 .4\' _‘5¥.,.'

''

'EAST TENNESSEE STATE UNIVERSITl11111111111111111111111111111111111TSUOO129O 1 , "I/". ,,. _‘ ‘j {‘,,PLAN’T’S OF EAST TENNE§SEETsuga Canadensis (L.) Carr.PinaceaeJohnson City, Washington County, TN4137 Bristol Highway Wing Deer ParkLocated in a highly shaded and forestedarea, right off the path ofHemlock Loop1600 ftTree, upright, 30’ tall, 2’ in diameter,bark very dark and rough,leaves are needles that are ﬂatJohn C. Warden Herbarium which are white underneathETSU #13\'0280 EISU John William Poston Collection #185/25/2010 Det. J ohn William Poston...,o _ 1 2 3 4 5 5 7 3 9 1o *9cm SERNEC Digitization Project - Tennessee Herbarium Consortium l...'

''

"UNIVERSITYT OTF TENNESSEEl|)|‘$\\||||NIHlﬂlllﬂlllﬂl|N||W1|ﬁ||H|l|HM*lllUCHTO    L'm\\ c1511) (If lcnnhxcc. Clhl[[;mki<l:_‘;l' SIIIJcIIL CUIICCUL‘HStale TN: ILImIlImI (RunnyFamily: Pinuceue‘IAIrg’cz a‘uIIuI/wzm ll,. I CLlITlt‘I‘CLWLIIIUII: (‘IIcen (imge Pﬂlx ulnam l/«V mile um rmil. Signal MmunmmDar: 15-ApIIl-21JIHColic ClC'l 1):\\{1c Km(V‘olleclion number: 66 ’ SERNEC Digitization PI'ojé ,7 - Tennessee {rbarium Consortium"

''

"UNIVERSITY OF TENNESSEE KNOXVILLE1111(ijWlmlwlwwWinnwNTennessee mﬁﬁﬁ- +Plnaceae égllllllTsuga canadensis (L.) Carr.ggéﬁlm C9 Less than 1m tall; growing on shady bench in pine litter‘W, of 19-20-year Old abandoned contour surface minelocated between State Rt 63 west and Interstate 75;about 1.9 m1 north of exit 141 on I-75, off the west sideof the interstate. Along with P. Virginiana and O.arboreum. -‘ -Co. Campbell Elev.1750'Coll. Barbara L. Rafaill No. 674 Date19 July, 1983With L. R. StritchILIILlll-l-E-E-o 1 2 3 4 5 5- 7 I8 9 10SERNEC Digitization Project - Tennessee Herbarium Consortiumcm"

''

'UNIVERSITY OFTENNESE KNOX llE_ mumuwmxmmimwmI IIWilli”!!!TENN-V-OOw; mii/mu549317PLANTS OFR.,$%chron_ A ."fﬁ:alutﬁhrz COUNTY FL; JUlV 19 4:93k;5§§e...‘.1 , O: {T \\ ﬁ‘ )"1Tsupa canauenu;s \\u.) uari.w 7“; ww,w~Above nampLuu, QLULLaoklng.Laurel Organ,mixed with ¢.Over-growihgcalmnll:1iau1a. 11HI---MILlﬂ-l-EI! _o 1 2 3 4 5 5 78 9 m IIII:cm SERNEC Digitization Project - Tennessee Herbarium Consortium'

('54', '93')

'.) 1:51.“UNIVERSITY OF IENNESSEE KNOXVILLE(Iruum/rgnmwwouWWW!“mum i " ‘1" IV.\' I. \' ’\\     FLORA TENNESSIENSISNails JOHNSON COUNTYCOLLECTOR. 53: £2 ggaggwood AQI‘ , 1593.4   Tsuga canadensis (L) Carr. Shady Valley Bog.2 3 4 5 6 78 9 1o Inﬁll“cm SERNEC Digitization Project - Tennessee Herbarium Consortium'

('15', '93')

'umvmsnv or 75Ar cmxmmo’tgggsmENIM/l/IIl/l/Illllll f/II/l/Illlllll/lHI/IINI/NII/UCHT006168    TENNESSEEOverton Co.: 1 Oct 1986 R.Kral 73680Tsuga canadensis (L.) Carr.Abundant trees on sandrocky bluffs alongLittle Indian Creek by old bridge, nearHanging Limb, by Tn 164 N of Monterey. R.kralDet.:34567891°III§SERNEC Digitization Project - Tennessee Herbarium Consortium'

('19', '86')

"UNIVERSIIY OF TENNESSEE KNOXVILLE\\\\WnNITWIENNN|1\\!LIWIWIW[LW1\\H[I|\\WI“INI 3 /  FLORA TENNESSIENSISNO. CAMPBELL COUNTYCOLLECTOR‘, , (TMwsA iimrs.Junea LellyTsuga canadensis (L.) Carr.Norris Dam region.Glrr o:- COLLECTOR   V ' @I ’MlLlll--------- I mgn 1 2 3 4 5 6 7 8 9 m IIIII0'“ SERNEC Digitization Project - Tennessee Herbarium Consortium"

''

"2 3SERNEC Digitizati:n Pro]: t T 6 7 8 9 10 ‘c - ennessee Herbar' :Ium Consortium M|DDLE TENNESSEESTATE unwensnv«wwwxwWWW“4  DENDROLOGY 406Rgtherford C0P1naceae .Tsu a canadensis Carr9 . 11/26/92Eastern hem1ockDBH 9”; 20' tall"

('11', '26')

"R IOII ‘ I II” M I 0 - I O I llSE NEC Dlgltlzat Flo eCt e essee 9 II ICmJ MIDDLE TENNESSEESTATE UNIV 3 Ty‘ mm»wmmmmnmmaﬁﬁi IIIF/rmmuMTSU000617 X.     § P/md's 0% 'rénneSSeeRMH’IzVRq-J CO‘ ’ V2?/0lPu'MceAe. d ‘ L1-5584 CWM “1515 .Eqsfem Henlock9a; 1:. {13:11 '20 S'foneb f2h/er Counjffy Club.5 V1mFJAA [I(cO'lv’le“ 5‘: 4‘"

''

's8 9 1o lllllll- -- 6 7- 2 3 4 51o - , _MIDDLE TENNESSEESTATE umvms,IJII/II/IMIWI/l/Nﬁ/mm M mum”TSU000663'

''

"VERSITY OF TENNESSEE KNOXVILLEIMWMW Wugmwswzwuwnu    MkV     _“ _ _V,-__.__ _. _.___. l“FLORA TENNESSIENSISCOLLECTOR. Jim WOlfe HawkinsACOUNTY_L “NO l 1630 ADI‘lJ. 16 ,9 55   Tsuga canadensis (19.) Carr.Moist woods and roadsidealong: Laurel Branch3 4 5 6' ' 10 III!SERNEC Digitization Project - Tennessee Herbarium Consonium"

('16', '30')

'thn C. Warden HerbariumEast Tennessee State University#ﬂ‘S’QFJQ\'7EAST TE NESSW‘S TE UNIVERSITYm mm m“\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ “WM East Tennessee State University HerbariumPinacea {Su/A CA "9%Frasier FirWashington County, TNESTUExact location: ETSUHabitat: woodsDescription: tree, acorns, evergreen{(JCollected by: Diana BowersCollection# 2004023 05/24/04ﬁizfmzjﬁ‘jw -. “"ermr \'\\3 4 5 6 7 s9 10 WWSERNEC Digitization Project Tenness \',. . 7 _ _ - , ee Herbarlum Consortium l.-'

''

'8 9    mum- ---- -0 1 2 3 4 5 6 7 _ 31113un Wx1111\\1\\111\\\\\\\\suo 641006‘\\1MT\\11\\ \\\\ \\    Trees Found in TennesseeDavison CO 10/12/05PinaceaeTsuga Canadensis Linnaeu;Hemlock15 ftta11,4 in DBHF ound 30 ft south of bridge over small streamon northem side ofhouse by magnolian:nl, Vémnnnl ii I'

''

"PLANTS OF TENNESSEEPinacaceaeTsuga canadensis (L.) CarriereUnited States: Tennessee: Putman County: Found in front of Matthews-DanielsBuilding on Tennessee Tech. University, Cookeville Tn. Grassy Area with side walkbeside the tree.Latitude: 36.101900000000001 Longitude: 85.301299999999998. Elevation: 1019Feet.Tree is aprox. 50 feet tall. Grassy area beside the Campus Police Building.Collected by: JT Thurman #20 with Josh Daugherty on 12 June 2014.Identiﬁed by: 11‘ ’l‘hurman.TENNESSEE TECH UNIVERSI'I‘Y H ERBARIUM (HTTU)1PM IINIUFRQITV‘mmmmmmmw1w‘uMHIIU0186942 3 4 5 6 7 8 9 10SERNEC Digitization Project - Tennessee Herbarium Consortium"

('36.1019', '85.3013')

'Ill W! W! WI!UNIVEHill’TY OF TENNESSEE KN?) VILLEmmuw/y (mm; IIH/ll/IIIMIHI/ 1/? 1mNN-V-OO15469 ‘ {y ~;;:\\1\'!\\) A .9* \\ ‘VVlr 4’32» f; \\”5/ I t J I U4! - ; _ ”wan. m_- UU ¥ﬁ=“*“ n{3. /.\\ C:;\\_ "1 ‘. ‘4.. \\- x r»~~ ﬂ) \\"w- \' .q.‘.1..- _ -m..-_, v.“. ___._.-- - -FLDRA TENNESSIENSIST R E o Shanks -___RoanecoumTYCOLLEC OR, o fg ( . . v 50l NO, 206 3 1 Ma. 19  Tsuga canadensis (L.) Carr.volunteer in oak-pine woods,valley of Melton Cr-eek onOak Ridge Reservatlon         LmLmI-I§-_ , _o 1 2 3 4 5 5 7 3 9 1cm0 IﬂlldlSERNEC Digitization Project - Tennessee Herbarium Consortium'

''

'HerbariumTennessee Technological UniversityPlants of Putnam County, TennesseeHERBARIUM Tsuga Eanadensis (14.) Carr76 58 ., Verble Hollow, two rules east of Monterey.TENNESSESlefgﬁeLOGICAL Woods . Pinac eae . Hemlock .EE TECHNOLOGICAL UNIVERSITYTIIEIHINIWISINHHIIUIIUINNIIIOIHJIHJIJIIIHMill!HTTUOO2 3 4 5 8 9SERNEC Digitization Project - Tennessee Herbarium Consortium'

('76', '58')

"UNIVERSITY OF IENNESSEE KN XVIL Euw“Wlmmummﬂrﬂuw FLORA TENNESSIENSISNo CAMPBELL COUNTYCOLLECTOR. J MLlQISSJMm: - .Tunea KellvGIFT OF COLLECTORTsuga canadensis (L.) Carr.Norris Dam region.Gift of Mrs. Junea Kelly.' MM_m-l-_-!-_-_-o 1 2 3 4 5 5 7 V 8 9 1cm0SERNEC Digitization Project - Tennessee Herbarium Consortium"

''

"\\UNN RSlTY 0F TENNE SEE KNOXVlLLE\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\N\\\\E\\\\\\:\\\\\\WWA\\EWM\\\\L .mIhe Her f f h ' ' f P laha lum I' yt e U I e slt o ennsylo n V VanPLA \\x-NTs OF PENNSYLVANIABerks COuntyMTsugs. canadensis (L.) CarrWOOds, 3 mi. north of ECKVR.L. ILLESchaeffer, Jr. 2732 J 1;111.? . 1939mum -- k _,-- m-!-----2 3 4 5 7 ‘ 8 910"

''

'RHODES COLLEGEllllﬂllllllﬂllﬂliﬂlﬂilollﬂj|jHﬂiﬂHllllHlHlll      . L. C .Namelzkéwg {igﬂﬂénsr\'f 1)“,quRegion memhAB /. 7: ..Habitat I Ola 6-7247 ‘Date 5! ‘ j 7 ‘ :Collectorjmes /‘ Grave;Locality {    . .“ﬁ 3;,W. 7 " m ,n"“’_"\'o 1 2 3 4 5 6 7 8 9 1 o EIII-cm SERNEC Digitization Project -.Tennessee Herbarium Consortium'

('72', '47')

'9 10- l ortium$1431ng 01‘ MIDDLE TENNESSEESTATE UNIVERSITYHlllllH/HllHIJIIIWIIIIIIHIIWIIHIIUIIIWHIWIIJMTSU000669v } a Q:Rutherford Co. hovemver 1,, 1, 3linaceae . l‘ \\ ﬂTsuga canadGUSLS \\u.) 9Eastern hemlock; Canada"1“ qk“ \' ‘ .‘ J. q ‘1 f‘Va. 4.3 ft. in diamepel, V?.W u .tall. Lediun~sized tree Wltn pen-dulous branches and pubescent blets, growing besides 3 1p4.I§.... ;‘Fge v51*ehouse. 1000 gas; Lain street.-1 i,}John L. :nee [L4.1m'

('10', '0')

'EASTTENNESSEESTAIEUNWERSHYlHIINWIIIIWllNlUﬂlMlmﬂlﬂHﬂl"ll!Ill!\\lllETSU  //"Plants of Tennesseensstern HemlockDINAK ‘E\\ OTsuga cana‘densmBarry Bentley, 224 July 1983Hawkins Co. Church Hill, Tn.DetzBarry N. Bentley4 6 7 8 9 10SERNEQ Digitizgtion Projgct - Tennessee Herbarium Consortium'

('19', '83')

'EAST TENNESSEE STATE UNIVEHImmumuammwmmummy:unﬁt"ETSUOO.IJfDooL-QJM 3.30.54 2015PLANTS OF NE TENNEESSEEEast Tennessee State University HerbariumTsuga anadensis (L.) Carr.PinaceaeEastern HemlockSullivan County R04"; ? “15¢ fw132 Windmere PI. Kingsport, TN 37664Front yard, down the driveway 20 paces to the left inthe woods 5 metersN am +0.th Shady area418 metersN 365063390, W -82.5382789Dark green needles closer to the main stem, lightgreen needles at the tips, about 6m in height, smallcones are light green, smaller cones light brownTennessee Flora Committee. Guide to the VascularPlants of TN. lst ed. Knoxville: U of Tennessee, 2015.Hannah Haworth#15 date: 5/24/20155 6 7 9 10SERNEtE Digitization Project - Tennessee Herbarium Consortium'

''

'EAST TENNESSEE STATE UNNERSHY 30 8 (a\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ 4:;\\ETSU001294 "“3.h‘ PLANTS OF TennesseeI‘}I\'undy 00.: 2 May 1"}?72 f}. Cronscnllin 1591Tsugxa canadensis (I...) Carro”mmdy State Forest; deep gorr’tes: nearStinkinp Creek and Fir‘ev ”zizzar‘d River"D .. 3near ‘l‘racy City.4 5 6 7 8 9 10SERNEC Digitiziation Project - Tennessee Herbarium Consortium'

('15', '91')

In [7]:
testLocations = ['35-27-37N/ 85-29-12W',
 'Latitude: 26.055250000000001 Longitude: -85.524979999999999',
 '36.1048097000, -85.8121487000145.0',
 '36.145595°N, -82.847194°W',
 '36.448462° N -83.566273° W',
 '35.14346°, -89.82969°',
 '''6°26'01 N 86°31'22" W''',
 '''36 32'20" N; 84 43' 45" W''',
 '''36° 12' 35" N by 83° 02' 00" W''',
 'N, 85-06-00w',
 '35.222310 -85.2227',
 '35.14346°, -89.82969°',
 "N 35° 03. 801' W 085° 23. 755'",
 '''36.21'51.55"N 84.16'03.43"W''',
 "N 35°6.27' W 085°22.1'",
 '35.267824* -89.263256*',
 '36.105 N, -85.812 W',
 '36.1048097000, -85.8121487000145.0',
 '36.000 N, -85.666 W',
 '''35* 55' 15"N, 82*57'40"W''',
 '35.58247°, -90.65597°',
 '''35°1'36.7386", W 85°19'0.804''',
 '36.145595° N -82.847194° W',
 '''36 23'30" N; 84 37'45" W.''',
 '''035° 45' 10" North Latitude, 083° 12' 35"''',
 '35.63451° Latitude -83.47026° Longitutde',
 '36.0597° North Latitude, -085.79128° West Longitude',
 '''35-01-55 N  86-11-34 W''',
 '''36°13'08.0"N 85°30'06.8"W''',
 'Latitude: 36.17 Longitude: -85.53',
 'Latitude: 35 11 22.06 Longitude: 85 52 56.13',
 'Latitude: 26.055250000000001 Longitude: -85.524979999999999',
 '36-39-30N, 85-14-20W',
 '''longitude 85 44'30" latitude 35 00'30".''',
 '35.14346°, -89.82969°',
'''33° 47' 53.000"N, 84° 18' 56.000"W. 880 feet.''',
"Latitude: 36.17294 Longitude: —85.50482."]


In [8]:
for location in testLocations[::-3]:
    for key,val in replacements.items():
        location = location.replace(key,val)
    result = parseGPS(location)    
    display('string : {}'.format(location))
    display('results : {}'.format(result))
    print('\n')

'string : Latitude: 36.17294 Longitude: -85.50482.'

"results : ('36.17294', '-85.50482')"





'string : longitude 85 44\'30" latitude 35 00\'30".'

'results : None'





'string : Latitude: 35 11 22.06 Longitude: 85 52 56.13'

'results : None'





'string : 35-01-55 N  86-11-34 W'

"results : ('1.917', '86.183')"





'string : 035° 45\' 10" North Latitude, 083° 12\' 35"'

'results : None'





'string : 35°1\'36.7386", W 85°19\'0.804'

"results : ('35.026861', '38.100000')"





'string : 36.000 N, -85.666 W'

"results : ('36.0', '-85.666')"





'string : 35.267824* -89.263256*'

"results : ('35.267824', '-89.263256')"





"string : N 35° 03. 801' W 085° 23. 755'"

"results : ('35.000833', '80.016667')"





'string : N, 85-06-00w'

'results : None'





'string : 6°26\'01 N 86°31\'22" W'

"results : ('1', '86')"





'string : 36.145595°N, -82.847194°W'

"results : ('36.145595', '-82.847194')"





'string : 35-27-37N/ 85-29-12W'

"results : ('27.617', '85.483')"



