In [111]:
# module that imports CSVs and processes to DB
import os, errno, array, csv, math, re
from datetime import datetime
#from dateutil.parser import parse
import dateutil.parser
import db_settings
import psycopg2
from psycopg2 import sql
#from state import State


state.layout = {
    'date_field': ['SpudDate', 'CompDate', 'AbdDate'],
    'api_field': 'API',
    'type_field': 'Type',
    'status_field': 'Status',
    'lat_field': 'Latitude',
    'lon_field': 'Longitude',
    'description_layout': '\n'.join(['Descriptor1: %s',
                             'Descriptor2: %s',
                             'Descriptor3: %s',
                             'Descriptor4: %s',
                             'Descriptor5: %s'
                            ]),
    'description_fields': ['DescCol1', 'DescCol2', 'DescCol3', 'DescCol4', 'DescCol5'],
    'type_map': None,
    'status_map': None
}
# layout_template = {
#     'date_field': '',
#     'api_field': '',
#     'api_includes_state': True,
#     'type_field': '',
#     'status_field': '',
#     'category_field': '',
#     'lat_field': '',
#     'lon_field': '',
#     'location_src_field': '',
#     'description_layout': '\n'.join(['Descriptor1: %s',
#                              'Descriptor2: %s',
#                              'Descriptor3: %s',
#                              'Descriptor4: %s',
#                              'Descriptor5: %s'
#                             ]),
#     'description_fields': ['DescCol1', 'DescCol2', 'DescCol3', 'DescCol4', 'DescCol5'],
#     'type_map': None,
#     'status_map': None,
#     'category_map': None,
#     'location_src_map': None
# }

def map_template(map_type, values):
    if map_type == 'type':
        t = 'type_map'
        s = well_types
    elif map_type == 'status':
        t = 'status_map'
        s = well_statuses
    elif map_type == 'category':
        t = 'category_map'
        s = well_categories
    elif map_type == 'location_src':
        t = 'location_src_map'
        s = location_sources.keys()
    print ('valid options:', s)
    print ("    '%s': {" % t)
    for item in values:
        print ("        '" + item + "': '" + (item.upper() if item.upper() in s else '') + "',")
    print ("    },")
    
state_apis = {'WA': '46', 'DE': '07', 'DC': '08', 'WI': '48', 'WV': '47', 'HI': '51', 'FL': '09', 'WY': '49', 
              'NH': '28', 'NJ': '29', 'NM': '30', 'TX': '42', 'LA': '17', 'AK': '50', 'NC': '32', 'ND': '33', 
              'NE': '26', 'TN': '41', 'NY': '31', 'NorthernGulfofMexico': '60', 'PA': '37', 'RI': '38', 
              'NV': '27', 'VA': '45', 'CO': '05', 'CA': '04', 'PacificCoastOffshore': '56', 'AL': '01', 
              'AlaskaOffshore': '55', 'AR': '03', 'VT': '44', 'IL': '12', 'GA': '10', 'IN': '13', 'IA': '14', 
              'OK': '35', 'AZ': '02', 'ID': '11', 'CT': '06', 'ME': '18', 'MD': '19', 'MA': '20', 'OH': '34', 
              'UT': '43', 'MO': '24', 'MN': '22', 'MI': '21', 'KS': '15', 'MT': '25', 'AtlanticCoastOffshore': '61', 
              'MS': '23', 'SC': '39', 'KY': '16', 'OR': '36', 'SD': '40'}

well_types = {'OIL',
    'GAS',
    'OILANDGAS',
    'INJECTION',
    'STORAGE',
    'DISPOSAL',
    'SERVICE',
    'DRY HOLE',
    'OBSERVATION',
    'TEST',
    'WATER',
    'OTHER',
    'UNKNOWN'}
well_statuses = {'ACTIVE', 'A', 'PA', 'TA', 'SI', 'ORPHAN', 'PERMITTED', 'CANCELLED', 'UNKNOWN'} # PA: plugged and abandoned, TA: temporarily abandoned
well_categories = {'CONVENTIONAL','UNCONVENTIONAL','FRAC','CBM','SERVICE','TEST','STORAGE','WATER','OTHER','UNKNOWN'}
location_sources = { 'S': 'Surface', 'BH': 'Bottom Hole' }








In [122]:
class State(object):
    def __init__(self, state, **kwargs):
        self.name = state
        #self.api_function = api_function
        for key, value in kwargs.items():
            setattr(self, key, value)
        
        if 'filename' in kwargs:
            self.filename = 'csvs/' + kwargs['filename']
        else:
            self.filename = 'csvs/' + self.name.lower() + '-' + 'data' + '.csv'
        
#         if 'date_function' in kwargs:
#             self.date_function = kwargs['date_function']
#         else:
#             self.date_function = lambda x: dateutil.parser.parse(x)
            
        self.data = None
        self.layout = None
        self.source_url = None
        self.description = None
        self.download_directory = os.path.abspath('downloads/' + self.name).lower()
        self.projection_srid = 4326 # WGS84 by default, but can be overridden

    def load_data(self):
        try:
            with open(self.filename, 'r') as f:
                reader = csv.DictReader(f)
                self.data = [row for row in reader]
        except IOError as e:
            raise IOError("Error in state.load_csv loading data. Verify that " + filename + ' exists and try again. ' + e)
    
    def write_to_db(self):
        if not self.source_url:
            print ('Warning: Missing source_url')
        if not self.description:
            print ('Warning: Missing state description')

        conn = psycopg2.connect(database=db_settings.DB, user=db_settings.USER, password=db_settings.PASSWD, host=db_settings.HOST)
        with conn:
            with conn.cursor() as cur:
                query = 'UPDATE states SET (source_url, description, last_updated) = (%(source_url)s, %(description)s, now()) WHERE state=%(state)s;'
                values = {'source_url': self.source_url, 'description': self.description, 'state': self.name}
                try:
                    cur.execute(query, values)
                except psycopg2.Error as e:
                    print (query)
                    print (e.pgerror)

        conn.close()
        print ('Wrote description for', self.name, 'to database')
    def set_projection(self, proj):
        self.projection_srid = proj

In [103]:
class Dataset(object):   
    def __init__(self, state):
        self.__dict__.update(state.layout)
        self.state_name = state.name
        self.source_data = state.data
        self.projection = state.projection_srid
        self.state = state
        self.layout_ready = False
        self.processed_data = None
        if self.ready():
            self.process_rows()

    def ready(self):
        if self.layout_ready:
            return True
        
        result = []
        keys = [key for key in self.source_data[0].keys()]
                
        #for field, mp in iter({}):
        
            #d = {el:'' for el in a}

        if self.type_field and not self.type_map:
            print ('Missing or invalid type_map.')
            map_template('type', { row[self.type_field] for row in self.source_data })
            return False
        if self.status_field and not self.status_map:
            print ('Missing or invalid status map.')
            map_template('status', { row[self.status_field] for row in self.source_data })
            return False
        if self.category_field and not self.category_map:
            print ('Warning: missing or invalid category map')
            map_template('category', { row[self.category_field] for row in self.source_data })
            return False
        
#         if self.location_src_field and not self.location_src_map:
#             print ('Warning: missing or invalid location_src map')
#             map_template('location_src', { row[self.location_src_field] for row in self.source_data })
#             return False

        for key, value in iter(self.__dict__.items()): # check for blank values
            if value == '':
                result.append(key)
        for key in layout_template.keys(): # check if all template items present
            if key not in self.__dict__.keys():
                result.append(key)
                
        if len(result) > 0:
            print ('Invalid layout. Available source file columns: \n\'' + "', '".join(keys) + "'")
            print ('\nExample rows:', self.source_data[:2])
            print ('Missing values for:', ', '.join(result))
            return False

        self.layout_ready = True
        return self.layout_ready
    
    def data_ready(self):
        return True if self.processed_data else False
    
    def process_rows(self):
        if not self.layout_ready:
            print ('layout not ready')
            return False
        apis = dict()
        tmp_wells = []
        for row in self.source_data:
            #   XX  |  XXX  |  XXXXX  |  XX
            # State  County     Well     Bore
            api = ''
            if type(self.api_field) is list:
                fields = iter(self.api_field)
                while not api:
                    api = row[next(fields)]
            else:
                api = row[self.api_field]
                
            if not self.api_includes_state:
                api = state_apis[state.name] + api
                
            api = re.sub('[\W_]', '', api).upper()
            #if len(str(api)) not in [12, 14]:
            #    api = str(api)[0:13]
            if hasattr(self.state, 'api_function'):
                api = self.state.api_function(api)
            
            if not api:
                continue # some states such as AZ include non-O&G wells w/o API numbers
                
            try:
                lon = float(row[self.lon_field])
                lat = float(row[self.lat_field])
            except ValueError:        
                continue

            date = None
            if type(self.date_field) is list:
                fields = iter(self.date_field)
                while not date:
                    try:
                        date = row[next(fields)]
                    except StopIteration:
                        break
            else:
                date = row[self.date_field] if row[self.date_field] else None
            
            if date:
                if hasattr(self.state, 'date_function'):
                    date = self.state.date_function(date)
                else:
                    date = dateutil.parser.parse(date)
            
            if type(date) == datetime:
                date = date.date() # convert to date type
                
            if self.type_field and row[self.type_field] in self.type_map:
                well_type = self.type_map[row[self.type_field]]
            else:
                well_type = 'OTHER'
                
            well_status = self.status_map[row[self.status_field]] if self.status_field else 'UNKNOWN'
            well_category = self.category_map[row[self.category_field]] if self.category_field else None
            location_src = self.location_src_map[row[self.location_src_field]] if self.location_src_field else None
#             if not location_src:
#                 if len(str(api)) == 12:
#                     location_src = "S"
#                 elif len(str(api)) == 14:
#                     location_src = "BH"
            description_values = tuple([row[field_name] for field_name in self.description_fields])
            description = self.description_layout % description_values
            well = {'state': self.state_name, 'api': api, 'lon': lon, 'lat': lat, 'location_src': location_src, 
                    'date': date, 'type': well_type, 'status': well_status,
                    'category': well_category, 'description': description }
            
            if api not in apis: # new API
                apis[api] = well
                tmp_wells.append(well)
            else: # api already in dict
                if (not apis[api]['date']) or (well['date'] and well['date'] > apis[api]['date']): # if the new record has a newer date
                    apis[api] = well # keep the new value
                    tmp_wells.append(well) # and append
                else:
                    continue # discard this record
        
        self.processed_data = tmp_wells
        print (len(self.processed_data), 'of', str(len(self.source_data)), 'rows loaded')
        print ('first item:', tmp_wells[:1])
        return True
    
    def commit_to_db(self):
        if not self.layout_ready:
            return self.ready()
        if not self.processed_data:
            print ('data not ready')
            return False
        inserts = 0
        conn = psycopg2.connect(database=db_settings.DB, user=db_settings.USER, password=db_settings.PASSWD, host=db_settings.HOST)
        cur = conn.cursor()

        for row in self.processed_data:

            fields = "api, state, status, type, category, date, description, location_src, location, capture_time"

            values = ("%(api)s, %(state)s, %(status)s, %(type)s, %(category)s, %(date)s, %(description)s, %(location_src)s, " + 
                      "Geography(ST_Transform(ST_GeometryFromText('POINT(%(lon)s %(lat)s)', {projection}), 4326)), " + 
                      "now()").format(projection = str(self.projection))

            if row['date'] == '':
                row['date'] = None
            
            query =  ("INSERT INTO wells ({fields}) VALUES ({values}) " + 
                      "ON CONFLICT (api, status, date) DO UPDATE SET ({fields}) = ({values}) " +
                      "WHERE wells.api = %(api)s AND wells.status = %(status)s" +
                      "").format(fields = fields, values = values)
            #print(query)
            try:
                cur.execute(query, row)
                inserts += 1
                # result = cur.statusmessage
            except psycopg2.Error as e:
                print (query)
                print (e.pgerror)
                return False
        conn.commit()
        cur.close()
        conn.close()
        
        print('loaded {0} of {1} records into the database'.format(inserts, len(self.processed_data)))
        return inserts

In [104]:
# Alabama
state = State('AL', date_function = lambda x: datetime.fromtimestamp(int(float(x))/1000))
well_source_url = 'http://www.gsa.state.al.us/ogb/results.aspx?api=%s&permit=%s'
state.source_url = 'http://www.gsa.state.al.us/ogb/db_main.html'
state.description = """
Data from the Geological Survey of Alabama State Oil and Gas Board
"""
state.write_to_db()


state.layout = {
    'date_field': 'WEBOGBSDE.DBO.WebWell_Prj.SpudDate',
    'api_field': ['WEBOGBSDE.DBO.BottomHoleLocations.APINumber', 'WEBOGBSDE.DBO.WebWell_Prj.API'],
    'api_includes_state': True,
    'type_field': 'WEBOGBSDE.DBO.WebWell_Prj.WellType',
    'category_field': 'WEBOGBSDE.DBO.WebWell_Prj.WellType',
    'status_field': 'WEBOGBSDE.DBO.WebWell_Prj.WellStatus',
    'lat_field': 'WEBOGBSDE.DBO.WebWell_Prj.Latitude',
    'lon_field': 'WEBOGBSDE.DBO.WebWell_Prj.Longitude',
    'source_well_id': 'WEBOGBSDE.DBO.WebWell_Prj.Permit',
    'location_src_field': None,
    'description_layout': '\n'.join(['Permit Num: %s',
                             'Well Name: %s',
                             'Operator: %s'
                             ]),
    'description_fields': ['WEBOGBSDE.DBO.WebWell_Prj.Permit', 'WEBOGBSDE.DBO.WebWell_Prj.HistoricWellName', 
                           'WEBOGBSDE.DBO.WebWell_Prj.Operator'],
    'type_map': {        'OIL': 'OIL',        'GST': 'STORAGE',        'CM': 'GAS',        'GAS': 'GAS',
        'SWD': 'DISPOSAL',        'WI': 'INJECTION',        'WW': 'WATER',        'UN': 'OTHER',
        'GC': 'GAS',        'SHG': 'GAS',        'WS': 'WATER',        'GI': 'INJECTION',
    },
    'status_map': {
        'PR': 'ACTIVE',        'PA': 'PA',        'AC': 'ACTIVE',        'AB': 'PA',        'PW': 'PERMITTED',
        'PP': 'UNKNOWN',        'CA': 'CANCELLED',        'CI': 'CANCELLED',        'DA': 'PA',
        'PB': 'PA',        'SI': 'PA',        'UN': 'UNKNOWN',        'CV': 'UNKNOWN',
        'RJ': 'UNKNOWN',        'TA': 'TA', 
    },
    'category_map': {
        'OIL': 'CONVENTIONAL',        'GST': 'STORAGE',        'CM': 'CBM',        'GAS': 'CONVENTIONAL',
        'SWD': 'OTHER',        'WI': 'UNCONVENTIONAL',        'WW': 'OTHER',        'UN': 'OTHER',
        'GC': 'CONVENTIONAL',        'SHG': 'UNCONVENTIONAL',        'WS': 'WATER',        'GI': 'UNCONVENTIONAL',
        'GS': 'CONVENTIONAL'
    },
    'location_src_map': None
}

"""
From http://www.ogb.state.al.us/ogb/database.aspx:
Well Status Descriptions
AB - Abandoned, AC - Active, CA - Canceled, CI - Canceled with Injection, CV - Converted, DA - Dry and Abandoned, 
PA - Plugged and Abandoned,PB - Plugged Back, PR - Producing, PW - Permitted Well, RJ - Released Jurisdiction
SI - Shut In, TA - Temporarily Abandoned, TP - Temporarily Plugged and Abandoned,UN - Undesignated


Well Type Descriptions, CM - Coal Bed Methane, GAS - Natural Gas, GC - Gas Condensate, 
GI - Gas Injection, GST - Gas Storage, OIL - Oil, SHG - Shale Gas, 
SWD - Salt Water Disposal, UN - Undesignated, WI - Water Injection, WS - Water Source, 
WW - Water Well
"""


state.load_data()
dataset = Dataset(state)
db_result = dataset.commit_to_db()

Wrote description for AL to database
16414 of 17086 rows loaded
first item: [{'state': 'AL', 'api': '01297203230000', 'lon': -88.302816, 'lat': 30.245575, 'location_src': None, 'date': datetime.date(1991, 7, 29), 'type': 'GAS', 'status': 'PA', 'category': 'CONVENTIONAL', 'description': 'Permit Num: 10008-OS-52-BH\nWell Name: STATE LEASE 527 #1\nOperator: Arco Oil & Gas Co., Div. Of Atlantic Richfield'}]
loaded 16414 of 16414 records into the database


In [85]:
# Alaska
state = State('AK')
state.set_projection(4267) # NAD27
state.source_url = 'http://aogweb.state.ak.us/DataMiner3/Forms/WellList.aspx'
state.description = """
Data from the Alaska Oil and Gas Conservation Commission
"""

state.layout = {
    'date_field': ['Spud Date', 'Completion Date', 'Release Date', 'Last Status Change'],
    'api_field': 'API',
    'api_includes_state': True,
    'type_field': 'Permitted Status',
    'status_field': 'Current Status',
    'category_field': 'Permitted Class',
    'lat_field': 'Wellhead Calculated Latitude (NAD27)',
    'lon_field': 'Wellhead Calculated Longitude (NAD27)',
    'description_layout': '\n'.join(['Permit No. %s', 
                                     'Operator: %s',
                             'Area: %s',
                             'Field: %s',
                             'Pools: %s',
                             'Well Name: %s'
                            ]),
    'description_fields': ['Permit', 'Operator', 'Area', 'Field', 'Pools', 'Well Name'],
    'type_map': {
        '': 'UNKNOWN',
        'Gas well, triple completion': 'GAS',
        'Oil & gas well, 2oil-1gas, triple comp': 'OILANDGAS',
        'Water injection, single completion': 'INJECTION',
        'Disposal injection well, Class 1': 'INJECTION',
        'Coal Bed Natural Gas': 'GAS',
        'Relief Well for Blowout': 'OTHER',
        'Information well': 'TEST',
        'Condensate well, single comp': 'OTHER',
        'Gas storage well': 'STORAGE',
        'Water alt gas injection': 'INJECTION',
        'Shale Bed Natural Gas': 'GAS',
        'Oil well, dual completion': 'OIL',
        'Unknown': 'UNKNOWN',
        'Water supply well': 'WATER',
        'Oil Well, Alt Wtr Inj, Single Comp': 'OIL',
        'Oil well, single completion': 'OIL',
        'Water injection, dual completion': 'INJECTION',
        'Gas well, dual completion': 'GAS',
        'Gas injection, single completion': 'GAS',
        'Disposal injection well, Class 2': 'INJECTION',
        'Oil & gas well, 2oil-2gas, quad comp': 'OILANDGAS',
        'Gas supply well': 'GAS',
        'Observation well': 'OBSERVATION',
        '401 Application Being Processed': 'UNKNOWN',
        'Geothermal': 'OTHER',
        'Gas well, single completion': 'GAS',
        'Gas well & Storage well': 'GAS',
    },
    'status_map': {
        'Gas well, triple completion': 'ACTIVE',
        'Permit expired': 'CANCELLED',
        'Water injection, single completion': 'ACTIVE',
        'Disposal injection well, Class 1': 'ACTIVE',
        'Gas well & Disposal well, dual comp': 'ACTIVE',
        'Information well': 'ACTIVE',
        'Shut In': 'PA',
        'Gas storage well': 'ACTIVE',
        'Water alt gas injection': 'ACTIVE',
        'Oil well, dual completion': 'ACTIVE',
        'Suspended well': 'TA',
        'Unknown': 'UNKNOWN',
        'Water supply well': 'ACTIVE',
        'Administratively abandoned': 'ACTIVE',
        'Commingled well (triple), oil': 'ACTIVE',
        'Oil well, single completion': 'ACTIVE',
        'Water injection, dual completion': 'ACTIVE',
        'Gas well, dual completion': 'ACTIVE',
        'Gas injection, single completion': 'ACTIVE',
        'Commingled well (dual), oil': 'ACTIVE',
        'Water injection, single pool, two tbg strings': 'ACTIVE',
        'Disposal injection well, Class 2': 'ACTIVE',
        'Plugged & Abandoned': 'PA',
        'Surface Plug': 'TA',
        'Observation well': 'ACTIVE',
        'Permit cancelled': 'CANCELLED',
        'Gas well (dual) & Storage well': 'ACTIVE',
        'Sqzd orig hole w/mult laterals': 'ACTIVE',
        'Geothermal': 'ACTIVE',
        'Gas well, single completion': 'ACTIVE',
        'Gas well & Storage well': 'ACTIVE',
    },
    'category_map': {
        'Development': 'CONVENTIONAL',
        'Exploratory': 'TEST',
        'Service': 'SERVICE',
        'Stratigraphic Test': 'TEST',
    },
    'location_src_field': None,
    'location_src_map': None
}

state.load_data()
dataset = Dataset(state)
db_result = dataset.commit_to_db()

8911 of 9160 rows loaded
first item: [{'state': 'AK', 'api': '50103205776300', 'lon': -151.043238764074, 'lat': 70.3376726847063, 'location_src': None, 'date': datetime.date(2018, 3, 13), 'type': 'OIL', 'status': 'UNKNOWN', 'category': 'CONVENTIONAL', 'description': 'Permit No. 218026\nOperator: CONOCOPHILLIPS ALASKA, INC.\nArea: ARCTIC SLOPE\nField: COLVILLE RIVER\nPools: QANNIK  OIL\nWell Name: COLVILLE RIV QANN CD2-463L2-01'}]
loaded 8911 of 8911 records into the database


In [119]:
# Arkansas
state = State('AR')
state.source_url = 'http://www.noggateway.org/reports'
state.description = "Data from the National Oil and Gas Gateway"
state.write_to_db()
state.set_projection(4269) # NAD83

state.layout = {
    'date_field': ['SPUD DATE', 'FIRST COMPLETION DATE', 'FIRST PRODUCTION DATE'],
    'api_field': 'API NUMBER',
    'api_includes_state': True,
    'type_field': 'STANDARD WELL TYPE DESCRIPTION',
    'status_field': 'STANDARD WELL STATUS DESCRIPTION',
    'category_field': 'STATE WELL TYPE',
    'lat_field': 'WELLHEAD LATITUDE, DECIMAL DEGREES',
    'lon_field': 'WELLHEAD LONGITUDE, DECIMAL DEGREES',
    'description_layout': '\n'.join(['WELL NAME: %s',
                             'WELL NUMBER: %s',
                             'OPERATOR: %s',
                             'COUNTY: %s',
                             'FIELD NAME: %s'
                            ]),
    'description_fields': ['WELL NAME', 'WELL NUMBER', 'OPERATOR', 'COUNTY', 'FIELD NAME'],
    'type_map': {
        'Underground Injection Control:Class 2 Disposal': 'DISPOSAL',
        'Unassigned': 'UNKNOWN',
        'Permit': 'UNKNOWN',
        'Coal Bed Methane': 'GAS',
        'Underground Injection Control:Class 2 Enhanced Recovery': 'SERVICE',
        'Water Supply': 'WATER',
        'Unknown': 'UNKNOWN',
        'Gas': 'GAS',
        'Gas Storage': 'STORAGE',
        'Other': 'OTHER',
        'Oil': 'OIL',
        'Brine Supply': 'WATER',
    },
    'status_map': {
        'Active:Producing': 'ACTIVE',
        'Abandoned': 'A',
        'Permit:Active': 'ACTIVE',
        'Abandoned:Plugged': 'PA',
        'Permit:Cancelled Expired': 'CANCELLED',
        'Inactive:Temporarily Abandoned': 'TA',
        'Inactive:Shut In': 'SI',
        'Unknown': 'UNKNOWN',
        'Unassigned': 'UNKNOWN',
        'Active': 'ACTIVE',
        'Active:Completed': 'ACTIVE',
    },
    'category_map': {
        'Need Code': 'UNKNOWN',
        'Oil - Production': 'CONVENTIONAL',
        'Seismic-exploratory Well': 'TEST',
        'Salt Water Injection': 'SERVICE',
        'Natural Gas - Dry': 'CONVENTIONAL',
        'Brine Injection Well': 'SERVICE',
        'Brine Supply Well': 'SERVICE',
        'Coal Bed Methane': 'CBM',
        'Salt Water Disposal': 'SERVICE',
        'Coal Bed Methane Service Well': 'SERVICE',
        'Enhanced Oil Recovery': 'FRAC',
        'Water Supply': 'SERVICE',
        'Unknown': 'UNKNOWN',
        'Expired Permit': 'UNKNOWN',
        'Gas Injector': 'SERVICE',
        'Gas Storage': 'STORAGE',
        'Waste Disposal Well': 'SERVICE',
        'Service Well': 'SERVICE',
    },
    'location_src_field': None,
    'location_src_map': None
}

state.load_data()
dataset = Dataset(state)
dataset.commit_to_db()

Wrote description for AR to database
30318 of 52490 rows loaded
first item: [{'state': 'AR', 'api': '03131110170000', 'lon': -94.2957, 'lat': 35.2014, 'location_src': None, 'date': datetime.date(2003, 9, 8), 'type': 'GAS', 'status': 'ACTIVE', 'category': 'CONVENTIONAL', 'description': 'WELL NAME: Harkreader  2-15\nWELL NUMBER: 2-15\nOPERATOR: Xto Energy, Inc.\nCOUNTY: Sebastian\nFIELD NAME: B-44 Area'}]
loaded 30318 of 30318 records into the database


30318

In [120]:
# Arizona
state = State('AZ')
state.source_url = 'http://services.azgs.az.gov/arcgis/rest/services/aasggeothermal/AZWellHeaders/MapServer/0'
state.description = """
Data from the Arizona Oil and Gas Conservation Commission ArcGIS map at http://ogviewer.azdeq.gov/.

Dates do not seem to be available from this source.
"""
state.date_function = lambda x: None

state.write_to_db()

state.layout = {
    'date_field': 'statusdate',
    'api_field': 'apino',
    'api_includes_state': True,
    'type_field': 'commodityofinterest',
    'status_field': 'status',
    'category_field': None,
    'lat_field': 'latdegree',
    'lon_field': 'longdegree',
    'location_src_field': None,
    'description_layout': '\n'.join(['Well Name: %s',
                             'Operator: %s',
                             'County: %s',
                             'Field: %s', 
                             'AZ OGCC Notes: %s'
                            ]),
    'description_fields': ['wellname', 'operator', 'county', 'field', 'notes'],
    'type_map': {
        'Carbon Dioxide': 'OTHER',
        'OilAndGas': 'OILANDGAS',
        'information': 'TEST',
        'water': 'WATER',
        'Information': 'TEST',
        'GeothermalEnergy': 'OTHER',
        'Helium': 'OTHER',
        'Unknown': 'UNKNOWN',
        'Geotechnical': 'OTHER',
        'Gas': 'GAS',
        'Brine': 'OTHER',
        'Monitor': 'OBSERVATION',
        'Water': 'WATER',
        'Observation': 'OBSERVATION',
        'Oil': 'OIL',
        'Liquified gas': 'GAS',
        'unknown': 'UNKNOWN',
        'CarbonDioxide': 'OTHER',
        'Salt': 'OTHER',
        'nil:missing': 'UNKNOWN'
    },
    'status_map': {
        'Capped': 'PA',
        'Not completed': 'CANCELLED',
        'Abandoned': 'A',
        'Abandoned junked': 'A',
        'Abandoned plugged': 'PA',
        'Active': 'ACTIVE',
        'Groundwater sampling date': 'UNKNOWN',
        'Never drilled': 'CANCELLED',
        'Status': 'UNKNOWN',
        'Unknown': 'UNKNOWN',
        'unknown': 'UNKNOWN',
        'Abondoned': 'PA',
        'Shut in': 'SI',
        'Abandoned temporary': 'TA',
        'Temoprarily abandoned': 'TA',
        'nil:missing': 'UNKNOWN',
    },
    'category_map': None,
    'location_src_map': None
}

state.load_data()
dataset = Dataset(state)
db_result = dataset.commit_to_db()

Wrote description for AZ to database
1144 of 4774 rows loaded
first item: [{'state': 'AZ', 'api': '0201290001', 'lon': -113.6326688, 'lat': 33.49979433, 'location_src': None, 'date': None, 'type': 'TEST', 'status': 'PA', 'category': None, 'description': 'Well Name: Phillips Petroleum H01 Federal\nOperator: Phillips Petroleum\nCounty: LA PAZ\nField: \nAZ OGCC Notes: no logs'}]
loaded 1144 of 1144 records into the database


In [None]:
# state
state = State('')
state.source_url = ''
state.description = ""
state.write_to_db()

state.layout = {
    'date_field': '',
    'api_field': '',
    'api_includes_state': True,
    'type_field': '',
    'status_field': '',
    'category_field': '',
    'lat_field': '',
    'lon_field': '',
    'location_src_field': '',
    'description_layout': '\n'.join(['Descriptor1: %s',
                             'Descriptor2: %s',
                             'Descriptor3: %s',
                             'Descriptor4: %s',
                             'Descriptor5: %s'
                            ]),
    'description_fields': ['DescCol1', 'DescCol2', 'DescCol3', 'DescCol4', 'DescCol5'],
    'type_map': None,
    'status_map': None,
    'category_map': None,
    'location_src_map': None
}

state.load_data()
dataset = Dataset(state)
dataset.commit_to_db()

In [123]:
# California
state = State('CA', filename = 'california-data.csv')
state.source_url = 'http://spatialservices.conservation.ca.gov/arcgis/rest/services/DOMS/Wells/MapServer/0'
state.description = ""
state.write_to_db()

state.layout = {
    'date_field': ['SpudDate', 'CompDate', 'AbdDate'],
    'api_field': 'API',
    'type_field': 'Type',
    'status_field': 'Status',
    'lat_field': 'Latitude',
    'lon_field': 'Longitude',
    'description_layout': '\n'.join(['Descriptor1: %s',
                             'Descriptor2: %s',
                             'Descriptor3: %s',
                             'Descriptor4: %s',
                             'Descriptor5: %s'
                            ]),
    'description_fields': ['DescCol1', 'DescCol2', 'DescCol3', 'DescCol4', 'DescCol5'],
    'type_map': None,
    'status_map': None
}

state.load_data()
dataset = Dataset(state)
dataset.commit_to_db()

Wrote description for CA to database
Invalid layout. Available source file columns: 
'', 'API', 'AbdDate', 'AreaName', 'BLMWell', 'Bmeridian', 'Comments', 'CompDate', 'ConfWell', 'County', 'DirDrill', 'District', 'Dryhole', 'EPAWell', 'Elevation', 'FieldName', 'GISSource', 'HydFrac', 'Latitude', 'LeaseName', 'Location', 'Longitude', 'OBJECTID', 'OpCode', 'OpName', 'OpWellID', 'Range', 'RedCanFlag', 'RedrillFt', 'Section', 'SpudDate', 'Status', 'TotalDepth', 'Township', 'Type', 'URL', 'WellNumber'

Example rows: [OrderedDict([('', '0'), ('API', '02900001'), ('AbdDate', ''), ('AreaName', 'Any Area'), ('BLMWell', 'N'), ('Bmeridian', 'MD'), ('Comments', ''), ('CompDate', ''), ('ConfWell', 'N'), ('County', 'Kern'), ('DirDrill', 'N'), ('District', 'Inland'), ('Dryhole', 'N'), ('EPAWell', 'N'), ('Elevation', '1302 KB'), ('FieldName', 'Kern River'), ('GISSource', 'gps'), ('HydFrac', ' '), ('Latitude', '35.43406'), ('LeaseName', ''), ('Location', 'Fr ctr 538W 98S'), ('Longitude', '-118.95876'),

False

In [None]:
# Colorado
state = State('CO')

In [None]:
# Connecticut
state = State('CT')

In [None]:
# Delaware
state = State('DE')

In [None]:
# District of Columbia
state = State('DC')

In [None]:
# Florida
state = State('FL')

In [None]:
# Georgia
state = State('GA')

In [None]:
# Hawaii
state = State('HI')

In [None]:
# Idaho
state = State('ID')

In [None]:
# Illinois
state = State('IL')

In [None]:
# Indiana
state = State('IN')
state.source_url = 'https://igs.indiana.edu/pdms/map/'
state.description = """
Oil and gas records retrieved from the Indiana Geological Survey. Well location data is
retrieved from the Petroleum Database Managment System at <a href="https://igs.indiana.edu/pdms/map/"
target="_blank">https://igs.indiana.edu/pdms/map/</a>. Well status information is retrieved
by querying the PDMS well record tables at: https://igs.indiana.edu/PDMS/WellSearch.cfm
All locations are assumed to be surface locations.
Indiana does not identify its wells using API numbers. For the purposes of our database,
we have used the two digit state prefix plus the IGS_ID to identify Indiana wells.
"""

state.write_to_db()
# IN Well Symbols and Statuses: https://igs.indiana.edu/pdms/Help/index.htm#t=Petroleum_Well_Symbols.htm
state.layout = {
    'date_field': 'Date',
    'api_field': 'IGS_ID',
    'api_includes_state': False,
    'type_field': 'SYMBOL',
    'status_field': 'SYMBOL',
    'category_field': None,
    'location_src_field': None,
    'lat_field': 'y',
    'lon_field': 'x',
    'description_layout': '\n'.join(['County: %s',
                             'Field Name: %s',
                             'IGS ID: %s',
                             'IGS Reported Status: %s',
                             'Link: <a href="https://igs.indiana.edu/pdms/wellEvents.cfm?igsID=%s">IGS link</a>'
                            ]),
    'description_fields': ['COUNTY', 'FIELD_NAME', 'IGS_ID', 'Status', 'IGS_ID'],
    'type_map': {
        'OAG': 'OILANDGAS',
        'AGAS-C': 'GAS',
        'AOIL': 'OIL',
        'AWIO': 'INJECTION',
        'GSG': 'STORAGE',
        'ASWDO': 'DISPOSAL',
        'AGAS-L': 'GAS',
        'AWIG': 'INJECTION',
        'AOAG-LC': 'OILANDGAS',
        'GSO': 'STORAGE',
        'AOIL-C': 'OIL',
        'NPWSO': 'WATER',
        'TAOIL': 'OIL',
        'GAS': 'GAS',
        'AOIL-L': 'OIL',
        'AGAS': 'GAS',
        'AOAG-L': 'OILANDGAS',
        'AOAG-C': 'OILANDGAS',
        'AOIL-LC': 'OIL',
        'WIOG': 'INJECTION',
        'OBG': 'OBSERVATION',
        'ASWDG': 'DISPOSAL',
        'AGSO': 'STORAGE',
        'AGAS-LC': 'GAS',
        'AGSG': 'STORAGE',
        'WIG': 'INJECTION',
        'SWDO': 'DISPOSAL',
        'OIL': 'OIL',
        'TAGAS': 'GAS',
        'WIO': 'INJECTION',
        'SWDG': 'DISPOSAL',
        'TAOG': 'OILANDGAS',
        'AOAG': 'OILANDGAS',
        'AWIOG': 'INJECTION',
    },
    'status_map': {  
        'OAG': 'ACTIVE',
        'AGAS-C': 'PA',
        'AOIL': 'PA',
        'AWIO': 'PA',
        'GSG': 'ACTIVE',
        'ASWDO': 'PA',
        'AGAS-L': 'PA',
        'AWIG': 'PA',
        'AOAG-LC': 'PA',
        'GSO': 'ACTIVE',
        'AOIL-C': 'PA',
        'NPWSO': 'ACTIVE',
        'TAOIL': 'TA',
        'GAS': 'ACTIVE',
        'AOIL-L': 'PA',
        'AGAS': 'PA',
        'AOAG-L': 'PA',
        'AOAG-C': 'PA',
        'AOIL-LC': 'PA',
        'WIOG': 'ACTIVE',
        'OBG': 'ACTIVE',
        'ASWDG': 'PA',
        'AGSO': 'PA',
        'AGAS-LC': 'PA',
        'AGSG': 'PA',
        'WIG': 'ACTIVE',
        'SWDO': 'ACTIVE',
        'OIL': 'ACTIVE',
        'TAGAS': 'TA',
        'WIO': 'ACTIVE',
        'SWDG': 'ACTIVE',
        'TAOG': 'TA',
        'AOAG': 'PA',
        'AWIOG': 'PA',
    },
    'category_map': None,
    'location_src_map': None
}

# Symobol Map
state.load_data()
dataset = Dataset(state)
db_result = dataset.commit_to_db()
if db_result:
    print ('loaded', str(db_result), 'of', str(len(dataset.processed_data)), 'records into the database')
else:
    print ("** error loading database! **")

In [None]:
# Iowa
state = State('IA')

In [None]:
# Kansas
state = State('KS')

In [None]:
# Kentucky
state = State('KY')

In [None]:
# Louisiana
state = State('LA')

In [None]:
# Maine
state = State('ME')

In [None]:
# Maryland
state = State('MD')

In [None]:
# Massachusetts
state = State('MA')

In [None]:
# Michigan
state = State('MI')

In [None]:
# Minnesota
state = State('MN')

In [None]:
# Mississippi
state = State('MS')

In [None]:
# Missouri
state = State('MO')

In [None]:
# Montana
state = State('MT')

In [None]:
# Nebraska
state = State('NE')

In [None]:
# Nevada
state = State('NV')

In [None]:
# New Hampshire
state = State('NH')

In [None]:
# New Jersey
state = State('NJ')

In [None]:
# New Mexico
state = State('NM')

In [None]:
# New York
state = State('NY')

In [None]:
# North Carolina
state = State('NC')

In [None]:
# North Dakota
state = State('ND')

In [None]:
# Ohio
state = State('OH')

In [None]:
# Oklahoma
state = State('OK')

In [None]:
# Oregon
state = State('OR')

In [None]:
# Pennsylvania
state = State('PA')
state.source_url = 'http://www.depreportingservices.state.pa.us/ReportServer/Pages/ReportViewer.aspx?/Oil_Gas/Spud_External_Data'
state.description = """
Data downloaded in CSV form based on setting SPUD BEGIN DATE = 1/1/1800 and SPUD END DATE = 12/31/2099
Represents 142,788, of which 110,000 have dates wells.
"""
state.write_to_db()

state.layout = {
    'date_field': 'SPUD_DATE',
    'api_field': 'API',
    'api_includes_state': False,
    'type_field': 'WELL_CODE_DESC',
    'status_field': 'WELL_STATUS',
    'category_field': 'UNCONVENTIONAL',
    'location_src_field': None,
    'lat_field': 'LATITUDE',
    'lon_field': 'LONGITUDE',
    'description_layout': '\n'.join(['County: %s',
                             'Operator Name: %s',
                             'OGO No: %s',
                             'Municipality: %s',
                             'Farm Name: %s',
                             'Well Configuration: %s'
                            ]),
    'description_fields': ['COUNTY', 'OPERATOR', 'OGO_NUM', 'MUNICIPALITY', 'FARM_NAME', 'CONFIGURATION'],
    
    'type_map': {
        'UNDETERMINED': 'OTHER',
        'MULTIPLE WELL BORE TYPE': 'OTHER',
        'OIL': 'OIL',
        'OBSERVATION': 'OBSERVATION',
        'DRY HOLE': 'DRY HOLE',
        'COALBED METHANE': 'GAS',
        'GAS': 'GAS',
        'COMB. OIL&GAS': 'OILANDGAS',
        'STORAGE WELL': 'STORAGE',
        'WASTE DISPOSAL': 'DISPOSAL',
        'INJECTION': 'INJECTION',
        'TEST WELL': 'TEST',
    },

    'status_map': {
        'DEP Abandoned List': 'ORPHAN',
        'Plugged OG Well': 'PA',
        'DEP Orphan List': 'ORPHAN',
        'DEP Plugged': 'PA',
        'Regulatory Inactive Status': 'UNKNOWN',
        'Operator Reported Not Drilled': 'CANCELLED',
        'Proposed But Never Materialized': 'CANCELLED',
        'Active': 'ACTIVE',
        'Abandoned': 'PA',
    },

    'category_map': {
        'Yes': 'UNCONVENTIONAL',
        'No': 'CONVENTIONAL',
    },
    'location_src_map': None
}

state.load_data()
dataset = Dataset(state)
db_result = dataset.commit_to_db()
if db_result:
    print ('loaded', str(db_result), 'of', str(len(dataset.processed_data)), 'records into the database')
else:
    print ("** error loading database! **")

In [None]:
# Rhode Island
state = State('RI')

In [None]:
# South Carolina
state = State('SC')

In [None]:
# South Dakota
state = State('SD')

In [None]:
# Tennessee
state = State('TN')

state.description = """
Tennessee's Department of Environment & Conservation does not 
doesn't make a lot of details available (no spud date, well status). The dates recorded are the 
Permit Date. Status is set to unknown for all wells. Per their website, 
<i>"This minimal permit information is intended to allow the public to easily track the number and 
locations of oil and gas well permits issued. For more detailed information about a specific well, 
contact the Oil and Gas program staff at 615-687-7109 . Permit files can be accessed by visiting 
the Oil and Gas Section at the Nashville Environmental Field Office in Nashville, TN 37243."</i>
"""
state.source_url = 'http://environment-online.state.tn.us:8080/pls/enf_reports/f?p=9034:34300:0::NO:::'
state.write_to_db()

state.layout = {
    'date_field': 'Permit Date',
    'api_field': 'API No',
    'api_includes_state': False,
    'type_field': 'Purpose af Well',
    'status_field': None,
    'category_field': None,
    'location_src_field': None,
    'lat_field': 'Latitude',
    'lon_field': 'Longitude',
    'description_layout': '\n'.join(['County: %s',
                             'Operator Name: %s',
                             'Permit No: %s',
                             'Well Name and Number: %s',
                             'EFO Name: %s'
                            ]),
    'description_fields': ['County', 'Operator Name', 'Permit No', 'Well Name and Number', 'EFO Name'],
    'type_map': None,
    'status_map': None,
    'category_map': None,
    'type_map': {
        '': 'OTHER',
        'Oil': 'OIL',
        'Oil And Gas': 'OILANDGAS',
        'Mineral Core Test': 'TEST',
        'Gas': 'GAS',
        'NCG - Domestic Use': 'OTHER',
    },
    'location_src_map': None
}

state.load_data()
dataset = Dataset(state)
db_result = dataset.commit_to_db()
if db_result:
    print ('loaded', str(db_result), 'of', str(len(dataset.processed_data)), 'records into the database')
else:
    print ("** error loading database! **")

In [None]:
# Texas
state = State('TX')

In [None]:
# Utah
state = State('UT')

In [None]:
# Vermont
state = State('VT')

In [None]:
# Virginia
state = State('VA')

In [None]:
# Washington
state = State('WA')

In [None]:
# West Virginia
state = State('WV')

In [None]:
# Wisconsin
state = State('WI')

In [None]:
# Wyoming
state = State('WY')

In [None]:
# Alaska Offshore
state = State('AK1')
# Pacific Coast Offshore
state = State('CA1')
# Northern Gulf of Mexico
state = State('TX1')
# Atlantic Coast Offshore
state = State('DC1')
