In [64]:
# module that imports CSVs and processes to DB
import os, errno, array, csv, math
from datetime import datetime
#from dateutil.parser import parse
import dateutil.parser
import db_settings
import psycopg2
#from state import State


layout_template = {
    'date_field': '',
    'api_field': '',
    'api_includes_state': True,
    'type_field': '',
    'status_field': '',
    'category_field': '',
    'lat_field': '',
    'lon_field': '',
    'location_src_field': '',
    'description_layout': '\n'.join(['Descriptor1: %s',
                             'Descriptor2: %s',
                             'Descriptor3: %s',
                             'Descriptor4: %s',
                             'Descriptor5: %s'
                            ]),
    'description_fields': ['DescCol1', 'DescCol2', 'DescCol3', 'DescCol4', 'DescCol5'],
    'type_map': None,
    'status_map': None,
    'category_map': None,
    'location_src_map': None
}

def map_template(map_type, values):
    if map_type == 'type':
        t = 'type_map'
        s = well_types
    elif map_type == 'status':
        t = 'status_map'
        s = well_statuses
    elif map_type == 'category':
        t = 'category_map'
        s = well_categories
    elif map_type == 'location_src':
        t = 'location_src_map'
        s = location_sources.keys()
    print 'valid options:', s
    print "    '%s': {" % t
    for item in values:
        print "        '" + item + "': '" + (item.upper() if item.upper() in s else '') + "',"
    print "    },"
    
state_apis = {'WA': '46', 'DE': '7', 'DC': '8', 'WI': '48', 'WV': '47', 'HI': '51', 'FL': '9', 'WY': '49', 
              'NH': '28', 'NJ': '29', 'NM': '30', 'TX': '42', 'LA': '17', 'AK': '50', 'NC': '32', 'ND': '33', 
              'NE': '26', 'TN': '41', 'NY': '31', 'NorthernGulfofMexico': '60', 'PA': '37', 'RI': '38', 
              'NV': '27', 'VA': '45', 'CO': '5', 'CA': '4', 'PacificCoastOffshore': '56', 'AL': '1', 
              'AlaskaOffshore': '55', 'AR': '3', 'VT': '44', 'IL': '12', 'GA': '10', 'IN': '13', 'IA': '14', 
              'OK': '35', 'AZ': '2', 'ID': '11', 'CT': '6', 'ME': '18', 'MD': '19', 'MA': '20', 'OH': '34', 
              'UT': '43', 'MO': '24', 'MN': '22', 'MI': '21', 'KS': '15', 'MT': '25', 'AtlanticCoastOffshore': '61', 
              'MS': '23', 'SC': '39', 'KY': '16', 'OR': '36', 'SD': '40'}

well_types = {'OIL', 'GAS', 'OILANDGAS', 'INJECTION', 'OBSERVATION', 'STORAGE', 'DISPOSAL', 'DRY HOLE', 'TEST', 'WATER', 'OTHER'}
well_statuses = {'ACTIVE', 'PA', 'TA', 'ORPHAN', 'PERMITTED', 'CANCELLED', 'UNKNOWN'} # PA: plugged and abandoned, TA: temporarily abandoned
well_categories = {'CONVENTIONAL','UNCONVENTIONAL','FRAC','CBM','UIC','TEST','STORAGE','WATER','OTHER'}
location_sources = { 'S': 'Surface', 'BH': 'Bottom Hole' }



class State(object):
    def __init__(self, state):
        self.name = state
        self.data = None
        self.layout = None
        self.source_url = None
        self.description = None
        self.download_directory = os.path.abspath('downloads/' + self.name).lower()
        self.projection_srid = 4326 # WGS84 by default, but can be overridden

    def load_data(self):
        try:
            filename = 'csvs/' + self.name.lower() + '-' + 'data' + '.csv'
            with open(filename, 'rb') as f:
                reader = csv.DictReader(f)
                self.data = [row for row in reader]
        except IOError as e:
            raise IOError("Error in state.load_csv loading data. Verify that " + filename + ' exists and try again. ' + e)
    
    def write_to_db(self):
        if not self.source_url:
            print 'Warning: Missing source_url'
        if not self.description:
            print 'Warning: Missing state description'

        conn = psycopg2.connect(database=db_settings.DB, user=db_settings.USER, password=db_settings.PASSWD, host=db_settings.HOST)
        with conn:
            with conn.cursor() as cur:
                query = 'UPDATE states SET (source_url, description, last_updated) = (%(source_url)s, %(description)s, now()) WHERE state=%(state)s;'
                values = {'source_url': self.source_url, 'description': self.description, 'state': self.name}
                try:
                    cur.execute(query, values)
                except psycopg2.Error as e:
                    print query
                    print e.pgerror

        conn.close()
        print 'Wrote description for', self.name, 'to database'
    def set_projection(self, proj):
        self.projection_srid = proj


class Dataset(object):   
    def __init__(self, state):
        self.__dict__.update(state.layout)
        self.state_name = state.name
        self.source_data = state.data
        self.projection = state.projection_srid
        self.layout_ready = False
        self.processed_data = None
        if self.ready():
            self.process_rows()

    def ready(self):
        if self.layout_ready:
            return True
        
        result = []
        keys = [key for key in self.source_data[0].keys()]
        
        for key, value in self.__dict__.iteritems(): # check for blank values
            if value == '':
                result.append(key)
        for key in layout_template.keys(): # check if all template items present
            if key not in self.__dict__.keys():
                result.append(key)
                
        if len(result) > 0:
            print 'Invalid layout. Available source file columns: \n\'' + "', '".join(keys) + "'"
            print '\nExample rows:', self.source_data[:2]
            print 'Missing values for:', ', '.join(result)
            return False
        
        if self.type_field and not self.type_map:
            print 'Missing or invalid type_map.'
            map_template('type', { row[self.type_field] for row in self.source_data })
            return False
        if self.status_field and not self.status_map:
            print 'Missing or invalid status map.'
            map_template('status', { row[self.status_field] for row in self.source_data })
            return False
        if self.category_field and not self.category_map:
            print 'Warning: missing or invalid category map'
            map_template('category', { row[self.category_field] for row in self.source_data })
            return False
        
        if self.location_src_field and not self.location_src_map:
            print 'Warning: missing or invalid location_src map'
            map_template('location_src', { row[self.location_src_field] for row in self.source_data })
            return False

        self.layout_ready = True
        return self.layout_ready
    
    def data_ready(self):
        return True if self.processed_data else False
    
    def process_rows(self):
        if not self.layout_ready:
            print 'layout not ready'
            return False
        apis = dict()
        tmp_wells = []
        for row in self.source_data:
            api = '' if self.api_includes_state else state_apis[state.name]
            api += filter(str.isalnum, row[self.api_field]).upper()

            try:
                lon = float(row[self.lon_field])
                lat = float(row[self.lat_field])
            except ValueError:        
                continue
            
            try:
                date = dateutil.parser.parse(row[self.date_field])
            except ValueError:
                date = None
            
            if self.type_field and row[self.type_field] in self.type_map:
                well_type = self.type_map[row[self.type_field]]
            else:
                well_type = 'OTHER'
                
            well_status = self.status_map[row[self.status_field]] if self.status_field else 'UNKNOWN'
            well_category = self.category_map[row[self.category_field]] if self.category_field else None
            location_src = self.location_src_map[row[self.location_src_field]] if self.location_src_field else None
            description_values = tuple([row[field_name] for field_name in self.description_fields])
            description = self.description_layout % description_values
            well = { 'state': self.state_name, 'api': api, 'lon': lon, 'lat': lat, 'location_src': location_src, 
                    'date': date, 'type': well_type, 'status': well_status,
                    'category': well_category, 'description': description }
            
            if api not in apis: # new API
                apis[api] = well
                tmp_wells.append(well)
            else: # api already in dict
                if (not apis[api]['date']) or (well['date'] and well['date'] > apis[api]['date']): # if the new record has a newer date
                    apis[api] = well # keep the new value
                    tmp_wells.append(well) # and append
                else:
                    continue # discard this record
        
        self.processed_data = tmp_wells
        print len(self.processed_data), 'of', str(len(self.source_data)), 'rows loaded'    
        print 'first item:', tmp_wells[:1]
        return True
    
    def commit_to_db(self):
        if not self.layout_ready:
            return self.ready()
        if not self.processed_data:
            print 'data not ready'
            return False
        inserts = 0
        conn = psycopg2.connect(database=db_settings.DB, user=db_settings.USER, password=db_settings.PASSWD, host=db_settings.HOST)
        with conn:
            with conn.cursor() as cur:
                for row in self.processed_data:
                    fields = ', '.join(['state', 'api', 'status', 'type', 'date', 'description', 'capture_time', 'location', 'location_src'])
                    if self.projection == 4326:
                        geog_str = "ST_GeographyFromText('POINT(%(lon)s %(lat)s)')"
                    else:
                        srid = str(self.projection)
                        geog_str = "Geography(ST_Transform(ST_GeometryFromText('POINT(%(lon)s %(lat)s)', " + srid + "), 4326))"
                    values =  "'" + self.state_name + "', %(api)s, %(status)s, %(type)s, %(date)s, %(description)s, now(), " + geog_str + ", %(location_src)s"  
                    query_structure = 'INSERT INTO wells (%s) VALUES (%s) ON CONFLICT (api, status, date) DO UPDATE SET (%s) = (%s) WHERE wells.api = %s AND wells.status = %s'
                    query = query_structure % (fields, values, fields, values, '%(api)s', '%(status)s')
                    try:
                        cur.execute(query, row)
                        inserts += 1
                        # result = cur.statusmessage
                    except psycopg2.Error as e:
                        print query
                        print e.pgerror
                        return False
        return inserts
    


In [35]:
# Alabama
state = State('AL')
well_source_url = 'http://www.gsa.state.al.us/ogb/results.aspx?api=%s&permit=%s'
state.source_url = 'http://www.gsa.state.al.us/ogb/db_main.html'
state.description = """
Data from the Geological Survey of Alabama State Oil and Gas Board
"""
state.write_to_db()

state.layout = {
    'date_field': 'Spud Date',
    'api_field': 'API',
    'api_includes_state': True,
    'type_field': 'Well Type',
    'category_field': 'Well Type',
    'status_field': 'Well Status',
    'lat_field': 'Latitude',
    'lon_field': 'Longitude',
    'source_well_id': 'Permit',
    'location_src_field': 'Location Src',
    'description_layout': '\n'.join(['Permit Num: %s',
                             'Well Name: %s',
                             'Operator: %s',
                             'Plugged Date: %s',
                             'County: %s',
                             'Prod Date: %s',
                             'GSA OGB Link: <a href="http://www.gsa.state.al.us/ogb/results.aspx?api=%s&permit=%s" target="_blank">Well Details</a>'
                            ]),
    'description_fields': ['Permit', 'Well Name', 'Operator', 'PA Date', 'County', 'Prod Date', 'API', 'Permit'],
    'type_map': {        'OIL': 'OIL',        'GST': 'STORAGE',        'CM': 'GAS',        'GAS': 'GAS',
        'SWD': 'DISPOSAL',        'WI': 'INJECTION',        'WW': 'WATER',        'UN': 'OTHER',
        'GC': 'GAS',        'SHG': 'GAS',        'WS': 'WATER',        'GI': 'INJECTION',
    },
    'status_map': {
        'PR': 'ACTIVE',        'PA': 'PA',        'AC': 'ACTIVE',        'AB': 'PA',        'PW': 'PERMITTED',
        'PP': 'UNKNOWN',        'CA': 'CANCELLED',        'CI': 'CANCELLED',        'DA': 'PA',
        'PB': 'PA',        'SI': 'PA',        'UN': 'UNKNOWN',        'CV': 'UNKNOWN',
        'RJ': 'UNKNOWN',        'TA': 'TA',
    },
    'category_map': {
        'OIL': 'CONVENTIONAL',        'GST': 'STORAGE',        'CM': 'CBM',        'GAS': 'CONVENTIONAL',
        'SWD': 'OTHER',        'WI': 'UNCONVENTIONAL',        'WW': 'OTHER',        'UN': 'OTHER',
        'GC': 'CONVENTIONAL',        'SHG': 'UNCONVENTIONAL',        'WS': 'WATER',        'GI': 'UNCONVENTIONAL',
    },
    'location_src_map': {
        'S': 'S',
        '&nbsp;': None,
        'C': None,
        'G': None,
        'F': None,
    }
}

"""
From http://www.ogb.state.al.us/ogb/database.aspx:
Well Status Descriptions
AB - Abandoned, AC - Active, CA - Canceled, CI - Canceled with Injection, CV - Converted, DA - Dry and Abandoned, 
PA - Plugged and Abandoned,PB - Plugged Back, PR - Producing, PW - Permitted Well, RJ - Released Jurisdiction
SI - Shut In, TA - Temporarily Abandoned, TP - Temporarily Plugged and Abandoned,UN - Undesignated


Well Type Descriptions, CM - Coal Bed Methane, GAS - Natural Gas, GC - Gas Condensate, 
GI - Gas Injection, GST - Gas Storage, OIL - Oil, SHG - Shale Gas, 
SWD - Salt Water Disposal, UN - Undesignated, WI - Water Injection, WS - Water Source, 
WW - Water Well
"""


state.load_data()
dataset = Dataset(state)
db_result = dataset.commit_to_db()
print 'loaded', str(db_result), 'of', str(len(dataset.processed_data)), 'records into the database'

Wrote description for AL to database
18130 of 18853 rows loaded
first item: [{'status': 'ACTIVE', 'date': datetime.datetime(2016, 7, 9, 0, 0), 'state': 'AL', 'api': '01035203440000', 'description': 'Permit Num: 17204-B\nWell Name: Cedar Creek Land &amp; Timber 3-7 #1\nOperator: Sklar Exploration Company L.L.C.\nPlugged Date: &nbsp;\nCounty: Conecuh\nProd Date: &nbsp;\nGSA OGB Link: <a href="http://www.gsa.state.al.us/ogb/results.aspx?api=01035203440000&permit=17204-B" target="_blank">Well Details</a>', 'location_src': 'S', 'lat': 31.25833, 'type': 'OIL', 'category': 'CONVENTIONAL', 'lon': -86.7386}]
loaded 18130 of 18130 records into the database


In [65]:
# Alaska
state = State('AK')
# state.set_projection(4267) # NAD27 - unclear what format the data coming from the ArcGIS database--presumably WGS84
state.source_url = 'http://doa.alaska.gov/ogc/publicdb.html'
state.description = """
Data from the Alaska Oil and Gas Conservation Commission
"""

state.layout = {
    'date_field': 'Spud_Date',
    'api_field': 'APINumber',
    'api_includes_state': True,
    'type_field': 'IStatus',
    'status_field': 'CStatus',
    'category_field': 'FStatus',
    'lat_field': 'y',
    'lon_field': 'x',
    'location_src_field': None,
    'description_layout': '\n'.join(['Operator Name: %s',
                             'Lease Num: %s',
                             'FldPlWld: %s',
                             'Area: %s',
                             'Well Name: %s'
                            ]),
    'description_fields': ['OPName', 'LeaseNum', 'FldPlWld', 'Area', 'WName'],
    'type_map': {
        '': 'OTHER',
        'GSTOR': 'STORAGE',
        '1-0IL': 'OIL',
        'WTRSP': 'WATER',
        '1-OIL': 'OIL',
        'GASSP': 'GAS',
        'WDSP1': 'DISPOSAL',
        'WDSP2': 'DISPOSAL',
        '3-GAS': 'GAS',
        '1-SBNG': 'GAS',
        'WAGIN': 'OTHER',
        '1O-WI': 'OIL',
        '2-GAS': 'GAS',
        'P&A': 'OIL',
        'CORE': 'TEST',
        '2O-2G': 'OILANDGAS',
        '2O-1G': 'OILANDGAS',
        'OBSW': 'OBSERVATION',
        '1G-GS': 'GAS',
        '1WINJ': 'INJECTION',
        '2-OIL': 'OIL',
        '1-CBNG': 'GAS',
        'INFO': 'TEST',
        'RELIF': 'OTHER',
        'WATER': 'WATER',
        'UN': 'OTHER',
        '1-GAS': 'GAS',
        'GEOTH': 'OTHER',
        'COND': 'OTHER',
        '1GINJ': 'OTHER',
        '2WINJ': 'OTHER',
    },
    'status_map': {
        '': 'UNKNOWN',
        'GSTOR': 'ACTIVE',
        'WTRSP': 'ACTIVE',
        '1WIN2S': 'ACTIVE',
        '1-OIL': 'ACTIVE',
        'P&AOG': 'PA',
        'WDSP1': 'ACTIVE',
        '1-SBNG': 'ACTIVE',
        'WDSP2': 'ACTIVE',
        '3-GAS': 'ACTIVE',
        'CO-2O': 'ACTIVE',
        'SQZED': 'UNKNOWN',
        '1G-WD': 'ACTIVE',
        'P&A-G': 'PA',
        'WAGIN': 'UNKNOWN',
        '1O-WI': 'ACTIVE',
        '1-WINJ': 'ACTIVE',
        '2-GAS': 'ACTIVE',
        'P&A': 'PA',
        '2G-GS': 'ACTIVE',
        'GI-1O': 'ACTIVE',
        'OBSW': 'UNKNOWN',
        '1G-GS': 'ACTIVE',
        '1-0IL': 'ACTIVE',
        'ADMA': 'UNKNOWN',
        '2-OIL': 'ACTIVE',
        'SUSP': 'TA',
        'SPLUG': 'PA',
        'CO-3O': 'UNKNOWN',
        'INFO': 'UNKNOWN',
        'SI': 'UNKNOWN',
        'UN': 'UNKNOWN',
        '1-GAS': 'ACTIVE',
        'GEOTH': 'UNKNOWN',
        '1WINJ': 'ACTIVE',
        '1GINJ': 'ACTIVE',
        '2WINJ': 'ACTIVE',
    },
    'category_map': {
        '': 'OTHER',
        'GSTOR': 'STORAGE',
        '1OINJ': 'UIC',
        'WTRSP': 'OTHER',
        '1-OIL': 'CONVENTIONAL',
        'GASSP': 'CONVENTIONAL',
        'P&AOG': 'CONVENTIONAL',
        'COND': 'OTHER',
        '1-SBNG': 'OTHER',
        'WDSP2': 'OTHER',
        'P&A-O': 'CONVENTIONAL',
        'CO-2O': 'OTHER',
        'SQZED': 'OTHER',
        '1G-WD': 'CONVENTIONAL',
        'P&A-G': 'CONVENTIONAL',
        '3-OIL': 'CONVENTIONAL',
        'PERMIT': 'OTHER',
        '1O-WI': 'UIC',
        '2-GAS': 'CONVENTIONAL',
        'P&A': 'CONVENTIONAL',
        'CO-3O': 'OTHER',
        'W-INJW': 'UIC',
        'OBSW': 'TEST',
        '1G-GS': 'CONVENTIONAL',
        '1WINJ': 'UIC',
        '2-OIL': 'CONVENTIONAL',
        'SUSP': 'CONVENTIONAL',
        '1-0IL': 'CONVENTIONAL',
        'OIL-WO': 'CONVENTIONAL',
        '1-CBNG': 'OTHER',
        'INFO': 'TEST',
        '3-GAS': 'CONVENTIONAL',
        'P&A-OG': 'CONVENTIONAL',
        'P&A-OC': 'CONVENTIONAL',
        'SI': 'OTHER',
        'WAGIN': 'OTHER',
        '1-GAS': 'CONVENTIONAL',
        'GEOTH': 'OTHER',
        'WDSP1': 'OTHER',
        '1GINJ': 'UIC',
        '2WINJ': 'UIC',
    },
    'location_src_map': None
}

state.load_data()
dataset = Dataset(state)
db_result = dataset.commit_to_db()
print 'loaded', str(db_result), 'of', str(len(dataset.processed_data)), 'records into the database'

8752 of 8752 rows loaded
first item: [{'status': 'PA', 'date': datetime.datetime(1986, 11, 28, 0, 0), 'state': 'AK', 'api': '50009009990000', 'description': 'Operator Name: HERSHEY\nLease Num: \nFldPlWld: WILDCAT\nArea: COOK INLET BASIN\nWell Name: WATER WELL-HERSHEY', 'location_src': None, 'lat': 61.65876676478805, 'type': 'GAS', 'category': 'CONVENTIONAL', 'lon': -149.41019943170826}]
loaded 8752 of 8752 records into the database


In [53]:
# Arizona
state = State('AZ')
source_url = 'http://azogcc.az.gov/'
state.description = """
Data from the Arizona Oil and Gas Conservation Commission ArcGIS map at http://ogviewer.azdeq.gov/
"""

state.write_to_db()

state.layout = {
    'date_field': 'date',
    'api_field': 'apino',
    'api_includes_state': True,
    'type_field': 'welltype',
    'status_field': 'status',
    'category_field': None,
    'lat_field': 'y',
    'lon_field': 'x',
    'location_src_field': None,
    'description_layout': '\n'.join(['Well Name: %s',
                             'Operator: %s',
                             'County: %s',
                             'Field: %s', 
                             'AZ OGCC Notes: %s'
                            ]),
    'description_fields': ['wellname', 'operator', 'county', 'field', 'notes'],
    'type_map': {
        'GasStorage': 'STORAGE',
        'Disposal': 'DISPOSAL',
        'Gas': 'GAS',
        'Water Well': 'WATER',
        'Geothermal': 'OTHER',
        'O&Gexplor': 'OILANDGAS',
    },
    'status_map': {
        'Unknown': 'UNKNOWN',
        'Shut in': 'PA',
        'Abandoned plugged': 'PA',
        'Active': 'ACTIVE',
        'Abandoned junked': 'PA',
        'Abandoned temporary': 'TA',
        'Temoprarily abandoned': 'TA',
    },
    'category_map': None,
    'location_src_map': None
}

state.load_data()
dataset = Dataset(state)
db_result = dataset.commit_to_db()
if db_result:
    print 'loaded', str(db_result), 'of', str(len(dataset.processed_data)), 'records into the database'
else:
    print "** error loading database! **"

Wrote description for AZ to database
1135 of 1135 rows loaded
first item: [{'status': 'PA', 'date': None, 'state': 'AZ', 'api': '0201290001', 'description': 'Well Name: Phillips Petroleum H01 Federal\nOperator: Phillips Petroleum\nCounty: LA PAZ\nField: \nAZ OGCC Notes: no logs', 'location_src': None, 'lat': 33.49979433000004, 'type': 'OTHER', 'category': None, 'lon': -113.63266879999998}]
loaded 1135 of 1135 records into the database


In [None]:
# Arkansas
state = State('AR')

In [None]:
# California
state = State('CA')

In [None]:
# Colorado
state = State('CO')

In [None]:
# Connecticut
state = State('CT')

In [None]:
# Delaware
state = State('DE')

In [None]:
# District of Columbia
state = State('DC')

In [None]:
# Florida
state = State('FL')

In [None]:
# Georgia
state = State('GA')

In [None]:
# Hawaii
state = State('HI')

In [None]:
# Idaho
state = State('ID')

In [None]:
# Illinois
state = State('IL')

In [73]:
# Indiana
state = State('IN')
state.source_url = 'https://igs.indiana.edu/pdms/map/'
state.description = """
Oil and gas records retrieved from the Indiana Geological Survey. Well location data is
retrieved from the Petroleum Database Managment System at <a href="https://igs.indiana.edu/pdms/map/"
target="_blank">https://igs.indiana.edu/pdms/map/</a>. Well status information is retrieved
by querying the PDMS well record tables at: https://igs.indiana.edu/PDMS/WellSearch.cfm
All locations are assumed to be surface locations.
Indiana does not identify its wells using API numbers. For the purposes of our database,
we have used the two digit state prefix plus the IGS_ID to identify Indiana wells.
"""

state.write_to_db()
# IN Well Symbols and Statuses: https://igs.indiana.edu/pdms/Help/index.htm#t=Petroleum_Well_Symbols.htm
state.layout = {
    'date_field': 'Date',
    'api_field': 'IGS_ID',
    'api_includes_state': False,
    'type_field': 'SYMBOL',
    'status_field': 'SYMBOL',
    'category_field': None,
    'location_src_field': None,
    'lat_field': 'y',
    'lon_field': 'x',
    'description_layout': '\n'.join(['County: %s',
                             'Field Name: %s',
                             'IGS ID: %s',
                             'IGS Reported Status: %s',
                             'Link: <a href="https://igs.indiana.edu/pdms/wellEvents.cfm?igsID=%s">IGS link</a>'
                            ]),
    'description_fields': ['COUNTY', 'FIELD_NAME', 'IGS_ID', 'Status', 'IGS_ID'],
    'type_map': {
        'OAG': 'OILANDGAS',
        'AGAS-C': 'GAS',
        'AOIL': 'OIL',
        'AWIO': 'INJECTION',
        'GSG': 'STORAGE',
        'ASWDO': 'DISPOSAL',
        'AGAS-L': 'GAS',
        'AWIG': 'INJECTION',
        'AOAG-LC': 'OILANDGAS',
        'GSO': 'STORAGE',
        'AOIL-C': 'OIL',
        'NPWSO': 'WATER',
        'TAOIL': 'OIL',
        'GAS': 'GAS',
        'AOIL-L': 'OIL',
        'AGAS': 'GAS',
        'AOAG-L': 'OILANDGAS',
        'AOAG-C': 'OILANDGAS',
        'AOIL-LC': 'OIL',
        'WIOG': 'INJECTION',
        'OBG': 'OBSERVATION',
        'ASWDG': 'DISPOSAL',
        'AGSO': 'STORAGE',
        'AGAS-LC': 'GAS',
        'AGSG': 'STORAGE',
        'WIG': 'INJECTION',
        'SWDO': 'DISPOSAL',
        'OIL': 'OIL',
        'TAGAS': 'GAS',
        'WIO': 'INJECTION',
        'SWDG': 'DISPOSAL',
        'TAOG': 'OILANDGAS',
        'AOAG': 'OILANDGAS',
        'AWIOG': 'INJECTION',
    },
    'status_map': {  
        'OAG': 'ACTIVE',
        'AGAS-C': 'PA',
        'AOIL': 'PA',
        'AWIO': 'PA',
        'GSG': 'ACTIVE',
        'ASWDO': 'PA',
        'AGAS-L': 'PA',
        'AWIG': 'PA',
        'AOAG-LC': 'PA',
        'GSO': 'ACTIVE',
        'AOIL-C': 'PA',
        'NPWSO': 'ACTIVE',
        'TAOIL': 'TA',
        'GAS': 'ACTIVE',
        'AOIL-L': 'PA',
        'AGAS': 'PA',
        'AOAG-L': 'PA',
        'AOAG-C': 'PA',
        'AOIL-LC': 'PA',
        'WIOG': 'ACTIVE',
        'OBG': 'ACTIVE',
        'ASWDG': 'PA',
        'AGSO': 'PA',
        'AGAS-LC': 'PA',
        'AGSG': 'PA',
        'WIG': 'ACTIVE',
        'SWDO': 'ACTIVE',
        'OIL': 'ACTIVE',
        'TAGAS': 'TA',
        'WIO': 'ACTIVE',
        'SWDG': 'ACTIVE',
        'TAOG': 'TA',
        'AOAG': 'PA',
        'AWIOG': 'PA',
    },
    'category_map': None,
    'location_src_map': None
}

# Symobol Map
state.load_data()
dataset = Dataset(state)
db_result = dataset.commit_to_db()
if db_result:
    print 'loaded', str(db_result), 'of', str(len(dataset.processed_data)), 'records into the database'
else:
    print "** error loading database! **"

Wrote description for IN to database
49963 of 51512 rows loaded
first item: [{'status': 'PA', 'date': datetime.datetime(1890, 1, 1, 0, 0), 'state': 'IN', 'api': '13138538', 'description': 'County: Hamilton\nField Name: Trenton\nIGS ID: 138538\nIGS Reported Status: gas\nLink: <a href="https://igs.indiana.edu/pdms/wellEvents.cfm?igsID=138538">IGS link</a>', 'location_src': None, 'lat': 39.994642027816056, 'type': 'GAS', 'category': None, 'lon': -86.18338877060668}]
loaded 49963 of 49963 records into the database


In [None]:
# Iowa
state = State('IA')

In [None]:
# Kansas
state = State('KS')

In [None]:
# Kentucky
state = State('KY')

In [None]:
# Louisiana
state = State('LA')

In [None]:
# Maine
state = State('ME')

In [None]:
# Maryland
state = State('MD')

In [None]:
# Massachusetts
state = State('MA')

In [None]:
# Michigan
state = State('MI')

In [None]:
# Minnesota
state = State('MN')

In [None]:
# Mississippi
state = State('MS')

In [None]:
# Missouri
state = State('MO')

In [None]:
# Montana
state = State('MT')

In [None]:
# Nebraska
state = State('NE')

In [None]:
# Nevada
state = State('NV')

In [None]:
# New Hampshire
state = State('NH')

In [None]:
# New Jersey
state = State('NJ')

In [None]:
# New Mexico
state = State('NM')

In [None]:
# New York
state = State('NY')

In [None]:
# North Carolina
state = State('NC')

In [None]:
# North Dakota
state = State('ND')

In [None]:
# Ohio
state = State('OH')

In [None]:
# Oklahoma
state = State('OK')

In [None]:
# Oregon
state = State('OR')

In [67]:
# Pennsylvania
state = State('PA')
state.source_url = 'http://www.depreportingservices.state.pa.us/ReportServer/Pages/ReportViewer.aspx?/Oil_Gas/Spud_External_Data'
state.description = """
Data downloaded in CSV form based on setting SPUD BEGIN DATE = 1/1/1800 and SPUD END DATE = 12/31/2099
Represents 142,788, of which 110,000 have dates wells.
"""
state.write_to_db()

state.layout = {
    'date_field': 'SPUD_DATE',
    'api_field': 'API',
    'api_includes_state': False,
    'type_field': 'WELL_CODE_DESC',
    'status_field': 'WELL_STATUS',
    'category_field': 'UNCONVENTIONAL',
    'location_src_field': None,
    'lat_field': 'LATITUDE',
    'lon_field': 'LONGITUDE',
    'description_layout': '\n'.join(['County: %s',
                             'Operator Name: %s',
                             'OGO No: %s',
                             'Municipality: %s',
                             'Farm Name: %s',
                             'Well Configuration: %s'
                            ]),
    'description_fields': ['COUNTY', 'OPERATOR', 'OGO_NUM', 'MUNICIPALITY', 'FARM_NAME', 'CONFIGURATION'],
    
    'type_map': {
        'UNDETERMINED': 'OTHER',
        'MULTIPLE WELL BORE TYPE': 'OTHER',
        'OIL': 'OIL',
        'OBSERVATION': 'OBSERVATION',
        'DRY HOLE': 'DRY HOLE',
        'COALBED METHANE': 'GAS',
        'GAS': 'GAS',
        'COMB. OIL&GAS': 'OILANDGAS',
        'STORAGE WELL': 'STORAGE',
        'WASTE DISPOSAL': 'DISPOSAL',
        'INJECTION': 'INJECTION',
        'TEST WELL': 'TEST',
    },

    'status_map': {
        'DEP Abandoned List': 'ORPHAN',
        'Plugged OG Well': 'PA',
        'DEP Orphan List': 'ORPHAN',
        'DEP Plugged': 'PA',
        'Regulatory Inactive Status': 'UNKNOWN',
        'Operator Reported Not Drilled': 'CANCELLED',
        'Proposed But Never Materialized': 'CANCELLED',
        'Active': 'ACTIVE',
        'Abandoned': 'PA',
    },

    'category_map': {
        'Yes': 'UNCONVENTIONAL',
        'No': 'CONVENTIONAL',
    },
    'location_src_map': None
}

state.load_data()
dataset = Dataset(state)
db_result = dataset.commit_to_db()
if db_result:
    print 'loaded', str(db_result), 'of', str(len(dataset.processed_data)), 'records into the database'
else:
    print "** error loading database! **"

Wrote description for PA to database
142788 of 203545 rows loaded
first item: [{'status': 'ACTIVE', 'date': None, 'state': 'PA', 'api': '3700300921', 'description': 'County: Allegheny\nOperator Name: DANIEL J NADLER\nOGO No: OGO-32546\nMunicipality: Aleppo Twp\nFarm Name: ZACHARY NADLER 1\nWell Configuration: Vertical Well', 'location_src': None, 'lat': 40.529607, 'type': 'OIL', 'category': 'CONVENTIONAL', 'lon': -80.139878}]
loaded 142788 of 142788 records into the database


In [None]:
# Rhode Island
state = State('RI')

In [None]:
# South Carolina
state = State('SC')

In [None]:
# South Dakota
state = State('SD')

In [68]:
# Tennessee
state = State('TN')

state.description = """
Tennessee's Department of Environment & Conservation does not 
doesn't make a lot of details available (no spud date, well status). The dates recorded are the 
Permit Date. Status is set to unknown for all wells. Per their website, 
<i>"This minimal permit information is intended to allow the public to easily track the number and 
locations of oil and gas well permits issued. For more detailed information about a specific well, 
contact the Oil and Gas program staff at 615-687-7109 . Permit files can be accessed by visiting 
the Oil and Gas Section at the Nashville Environmental Field Office in Nashville, TN 37243."</i>
"""
state.source_url = 'http://environment-online.state.tn.us:8080/pls/enf_reports/f?p=9034:34300:0::NO:::'
state.write_to_db()

state.layout = {
    'date_field': 'Permit Date',
    'api_field': 'API No',
    'api_includes_state': False,
    'type_field': 'Purpose af Well',
    'status_field': None,
    'category_field': None,
    'location_src_field': None,
    'lat_field': 'Latitude',
    'lon_field': 'Longitude',
    'description_layout': '\n'.join(['County: %s',
                             'Operator Name: %s',
                             'Permit No: %s',
                             'Well Name and Number: %s',
                             'EFO Name: %s'
                            ]),
    'description_fields': ['County', 'Operator Name', 'Permit No', 'Well Name and Number', 'EFO Name'],
    'type_map': None,
    'status_map': None,
    'category_map': None,
    'type_map': {
        '': 'OTHER',
        'Oil': 'OIL',
        'Oil And Gas': 'OILANDGAS',
        'Mineral Core Test': 'TEST',
        'Gas': 'GAS',
        'NCG - Domestic Use': 'OTHER',
    },
    'location_src_map': None
}

state.load_data()
dataset = Dataset(state)
db_result = dataset.commit_to_db()
if db_result:
    print 'loaded', str(db_result), 'of', str(len(dataset.processed_data)), 'records into the database'
else:
    print "** error loading database! **"

Wrote description for TN to database
15976 of 16042 rows loaded
first item: [{'status': 'UNKNOWN', 'date': datetime.datetime(2016, 7, 22, 0, 0), 'state': 'TN', 'api': '4113322739', 'description': 'County: Overton\nOperator Name: Hornet Corporation\nPermit No: 0012839\nWell Name and Number: Johnny Taylor #1A\nEFO Name: Cookeville', 'location_src': None, 'lat': 36.500806, 'type': 'OILANDGAS', 'category': None, 'lon': -85.240611}]
loaded 15976 of 15976 records into the database


In [None]:
# Texas
state = State('TX')

In [None]:
# Utah
state = State('UT')

In [None]:
# Vermont
state = State('VT')

In [None]:
# Virginia
state = State('VA')

In [None]:
# Washington
state = State('WA')

In [None]:
# West Virginia
state = State('WV')

In [None]:
# Wisconsin
state = State('WI')

In [None]:
# Wyoming
state = State('WY')

In [None]:
# Alaska Offshore
state = State('AK1')
# Pacific Coast Offshore
state = State('CA1')
# Northern Gulf of Mexico
state = State('TX1')
# Atlantic Coast Offshore
state = State('DC1')
