In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time
import random
# import to iterate through'owners' dictionary populated with owner objects
from itertools import islice
import re # used to split the city state and zip_code into seperate variables
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException

In [None]:
# make new dataframe from data read in from first row of csv file 
df_row_names = pd.read_csv('../Data/MineralData.CSV', encoding='Latin-1', low_memory=False)

In [None]:
# make new dataframe and read in the rest of the data from CVS file 
df = pd.read_csv('../Data/MineralData.CSV', encoding="Latin-1",low_memory=False, skiprows=1,header=None)

In [None]:
# make the contents of the dataframe df_row_names the column labels of newDF 
column_names = df_row_names.columns[0]
column_name_string = [x for x in column_names.split('"') if len(x) > 1]

In [None]:
# print number of columns
len(column_name_string)

# print all 137 column names 
for x in column_name_string:
    print(x)

In [None]:
# create new dataframe, newDf, with column_names for column titles and contents in df
newDF = pd.DataFrame(data=df.values,columns=column_name_string)

In [None]:
# print new data frame first 5 rows
newDF.head()

WEll attributes: 
YEAR LEASE STARTED
DECIMAL INTEREST
LEASE NAME
OPER NAME
DESCRIPTION 1
DESCRIPTION 2
IN CARE OF
ACRES
LEASE NUMBER
EXEMPT CODE
CUSTOMER GEO#
TCEQ VALUE-POLUTION CNTL
MINERAL ACCOUNT NUMBER
MINERAL ACCOUNT SEQUENCE #
PREVIOUS MIN ACCT SEQ #
PREVIOUS MIN ACCOUNT #
PRIVACY CODE
COMPLIANCE CODE
JUR 1 TAXABLE VALUE # sum this for the owner

In [None]:
# new owner and well objects with correct city state zip

# create Owner and well objects
# store Owner objects in a list called owners
class Owner:
    def __init__(self, name, owner_number, address, city, state, zip_code):
        self.name = name
        self.number = owner_number
        self.address = address
        self.city = city
        self.state = state
        self.zip_code = zip_code
        self.total_value = 0
        self.num_wells = 0
        self.wells = []
        # Split name into first, middle and last name
        self.first_name, self.middle_name, self.last_name = self._split_name(name)
        #self.legacy_dead = ""

    def update_wells_info(self):
        self.num_wells = len(self.wells)
        self.total_value = sum(well.value for well in self.wells)

    def add_well(self, well):
        self.wells.append(well)
        self.num_wells += 1
        self.total_value += well.value
        
    def associate_personal_info(self, personal_info):
        self.personal_info = personal_info
        
    def _split_name(self, name):
        first_name = ""
        last_name = ""
        middle_name = ""
        if not any(word in name for word in ["LLC", "LTD", "CO", "COMPANY", "INC", "INVESTMENTS", "FAMILY", "&"]):
            # other weird names: PIPELINE ABSOLUTE
            num_spaces = name.count(" ")
            if num_spaces == 1:
                last_name, first_name = name.split(" ")
            elif num_spaces >= 2: # this means there are 3 names
                name_parts = name.split(" ")
                last_name = name_parts[0]
                if len(name_parts[1]) > len(name_parts[2]):
                    first_name = name_parts[1]
                    middle_name = name_parts[2]
                elif len(name_parts[2]) > len(name_parts[1]):
                    first_name = name_parts[2]
                    middle_name = name_parts[1]
            return first_name, middle_name, last_name
        else:
            return "", "", ""

class Well:
    def __init__(self, yr_lease_start, dec_int, lease_name, lease_num, oper_name, desc1, desc2, ico, acres, ex_code, cust_geo, tceq, min_acct_num, min_acct_seq, prev_min_acct_seq, prev_min_acct_num, priv_code, comp_code, value):
        self.yr_lease_start = yr_lease_start
        self.dec_int = dec_int
        self.lease_name = lease_name
        self.lease_num = lease_num
        self.oper_name = oper_name
        self.desc1 = desc1
        self.desc2 = desc2
        self.ico = ico
        self.acres = acres
        self.ex_code = ex_code
        self.cust_geo = cust_geo
        self.tceq = tceq
        self.min_acct_num = min_acct_num
        self.min_acct_seq = min_acct_seq
        self.prev_min_acct_seq = prev_min_acct_seq
        self.prev_min_acct_num = prev_min_acct_num
        self.priv_code = priv_code
        self.comp_code = comp_code
        self.value = value

class PersonalInfo:
    def __init__(self, first_name, middle_name, last_name, age, birth_date, death_date, location):
        self.first_name = first_name
        self.middle_name = middle_name
        self.last_name = last_name
        self.age = age
        self.birth_date = birth_date
        self.death_date = death_date
        self.location = location

owners = {}  # Dictionary to store owner objects

owner_name_col = newDF["OWNER NAME"]
owner_number_col = newDF["OWNER NUMBER"]
st_address_col = newDF["STREET ADDRESS"]
city_st_zip_col = newDF["CITY, STATE, AND ZIP"]

yr_lease_start_col = newDF["YEAR LEASE STARTED"]
dec_int_col = newDF["DECIMAL INTEREST"]
lease_name_col = newDF["LEASE NAME"]
lease_num_col = newDF["LEASE NUMBER"]
oper_name_col = newDF["OPER NAME"]
desc1_col = newDF["DESCRIPTION 1"]
desc2_col = newDF["DESCRIPTION 2"]
ico_col = newDF["IN CARE OF"]
acres_col = newDF["ACRES"]
ex_code_col = newDF["EXEMPT CODE"]
cust_geo_col = newDF["CUSTOMER GEO#"]
tceq_col = newDF["TCEQ VALUE-POLUTION CNTL"]
min_acct_num_col = newDF["MINERAL ACCOUNT NUMBER"]
min_acct_seq_col = newDF["MINERAL ACCOUNT SEQUENCE #"]
prev_min_acct_seq_col = newDF["PREVIOUS MIN ACCT SEQ #"]
prev_min_acct_num_col = newDF["PREVIOUS MIN ACCOUNT #"]
priv_code_col = newDF["PRIVACY CODE"]
comp_code_col = newDF["COMPLIANCE CODE"]
value_col = newDF["JUR 1 TAXABLE VALUE"]

# Iterate over the wells columns
for name, number, address, city_st_zip, yr_lease_start, dec_int, lease_name, lease_num, oper_name, desc1, desc2, ico, acres, ex_code, cust_geo, tceq, min_acct_num, min_acct_seq, prev_min_acct_seq, prev_min_acct_num, priv_code, comp_code, value in zip(owner_name_col, owner_number_col, st_address_col, city_st_zip_col, yr_lease_start_col, dec_int_col, lease_name_col, lease_num_col, oper_name_col, desc1_col, desc2_col, ico_col, acres_col, ex_code_col, cust_geo_col, tceq_col, min_acct_num_col, min_acct_seq_col, prev_min_acct_seq_col, prev_min_acct_num_col, priv_code_col, comp_code_col, value_col):

    # Check if the city_st_zip contains a comma
    match = re.search(r"(\w+)\s+(\w{2})(\d+)", city_st_zip)
    if match:
        city_st_zip = match.group(1)
        state = match.group(2)
        zip_code = match.group(3)
    else:
        # If no match is found, set all values to None
        city_st_zip, state, zip_code = None, None, None
    
    # Create a new well object
    well = Well(yr_lease_start, dec_int, lease_name, lease_num, oper_name, desc1, desc2, ico, acres, ex_code, cust_geo, tceq, min_acct_num, min_acct_seq, prev_min_acct_seq, prev_min_acct_num, priv_code, comp_code, value)

    # Get the owner object from the dictionary if it exists, otherwise create a new owner
    owner = owners.get(name)
    if not owner:
        owner = Owner(name, number, address, city_st_zip, state, zip_code)
        owners[name] = owner

    # Create a new PersonalInfo object
    personal_info = PersonalInfo(
        owner.first_name,
        owner.middle_name,
        owner.last_name,
        0,                 # Placeholder for age (could be 0 or any other value)
        None,              # Placeholder for birth_date
        None,              # Placeholder for death_date
        owner.city         # Replace with the actual location value
    )
    
    owner.associate_personal_info(personal_info)  # Associate the personal_info object with the owner
    owner.add_well(well)  # Add well to the owner object
    owner.update_wells_info()  # Update the wells information


# Structure and contents of an owner object: 

Owner Name: A E D INVESTMENTS LLC         
Owner Number: 807262
Address: 333 TEXAS ST, STE 1525        
City: SHREVEPORT
State: LA
Zip Code: 71101
Total Value: 13310
Number of Wells: 33
Owner's Personal Information:
- First Name: N/A
- Middle Name: N/A
- Last Name: N/A
- Age: N/A
- Birth Date: N/A
- Death Date: N/A
Owner's Wells:
- Lease Name: BOX GAS UNIT #1               
  Lease Number: 20685
- Lease Name: PORTER #1                     
  Lease Number: 40770
- Lease Name: HEMBY GU (1)                  
  Lease Number: 44231
- Lease Name: PORTER #2                     
  Lease Number: 44243
Personal Info:
- First name
- Middle Name
- Last name
- Age
- Birth date
- Death date
- Location

In [None]:
num_owners = len(owners)
print("Number of owners (owner objects):", num_owners)

In [None]:
# Convert dict_values of owners to a list and slice all owners
owners_list = list(owners.values())

In [None]:
# people_owners: owner objects that have a first and last name and are therefore people 
people_owners = [owner for owner in owners_list if owner.first_name != "" and owner.last_name != ""]

In [None]:
# Convert dict_values to a list and slice all owners
people_owners_list = people_owners
print("People owners: ", len(people_owners))

In [None]:
# company_owners: owner objects that DONT have either a first or last name -> therefore company names 
company_owners = [owner for owner in owners_list if owner.first_name == "" or owner.last_name == ""]

company_owners_list = company_owners

In [None]:
print("Total owner objects: ", len(owners))
print("People owners: ", len(people_owners))
print("Company owners: ", len(company_owners))

________________________
People Finder Search:
________________________

In [None]:
pip install pyppeteer


In [None]:
pip install nest_asyncio


In [None]:
# JUST JOCELYN SEARCH 
import asyncio
import nest_asyncio
from pyppeteer import launch

nest_asyncio.apply()  # Apply nest_asyncio to allow running asyncio in Jupyter

async def people_finder_search(firstName, lastName, city, state):
    PROXY_USERNAME = 'scraperapi'
    PROXY_PASSWORD = '27b22388a69f09fa1c94117e631357e7'
    PROXY_SERVER = 'proxy-server.scraperapi.com'
    PROXY_SERVER_PORT = '8001'

    browser = await launch(headless=False, ignoreHTTPSErrors=True, args=[f'--proxy-server=http://{PROXY_SERVER}:{PROXY_SERVER_PORT}'])
    page = await browser.newPage()

    await page.authenticate({'username': PROXY_USERNAME, 'password': PROXY_PASSWORD})

    try:
        search_url = f'https://www.intelius.com/results/?firstName={firstName}&lastName={lastName}&city={city}&state={state}'
        await page.goto(search_url, {'timeout': 180000})

        await page.waitForSelector('#people', {'timeout': 30000})

        await asyncio.sleep(2)

        name = await page.evaluate('document.querySelector("#people > div > div > ul > li.name").textContent.trim()')
        age = await page.evaluate('document.querySelector("#people > div > div > ul > li.age").textContent.trim()')
        location = await page.evaluate('document.querySelector("#people > div > div > ul > li.location").textContent.trim()')

        result = {'name': name, 'age': age, 'location': location}
        print(result)

    finally:
        await browser.close()

# Create an event loop
loop = asyncio.get_event_loop()

# Call the function with your search parameters using the event loop
loop.run_until_complete(people_finder_search('Jocelyn', 'Rupp', 'pheonix', 'AZ'))

# Close the event loop
loop.close()

In [None]:
# SEARCH FOR FIRST 5 PEOPLE OWNERS
import asyncio
import nest_asyncio
from pyppeteer import launch

nest_asyncio.apply()  # Apply nest_asyncio to allow running asyncio in Jupyter

# Define people_finder_search function 
async def people_finder_search(firstName, lastName, city, state):
    PROXY_SERVER = 'proxy-server.scraperapi.com'
    PROXY_SERVER_PORT = '8001'

    browser = await launch(headless=False, ignoreHTTPSErrors=True, args=[f'--proxy-server=http://{PROXY_SERVER}:{PROXY_SERVER_PORT}'])
    page = await browser.newPage()

    await page.authenticate({'username': PROXY_USERNAME, 'password': PROXY_PASSWORD})

    try:
        search_url = f'https://www.intelius.com/results/?firstName={firstName}&lastName={lastName}&city={city}&state={state}'
        await page.goto(search_url, {'timeout': 180000})

        # implementation to handle pop-ups on result page
        # Wait for the pop-up to appear by selecting the pop-up's specific element
        #try:
        #    await page.waitForSelector('#popup-element', {'timeout': 10000})
        #    await page.click('#popup-element')  # Click the element to interact with the pop-up
        #except:
        #    pass  # If pop-up doesn't appear, continue scraping

        await page.waitForSelector('#people', {'timeout': 30000})

        await asyncio.sleep(2)

        name = await page.evaluate('document.querySelector("#people > div > div > ul > li.name").textContent.trim()')
        age = await page.evaluate('document.querySelector("#people > div > div > ul > li.age").textContent.trim()')
        location = await page.evaluate('document.querySelector("#people > div > div > ul > li.location").textContent.trim()')

        result = {'name': name, 'age': age, 'location': location}
        print(result)

    finally:
        await browser.close()

# Create an event loop
loop = asyncio.get_event_loop()

try:
    # Create a single browser instance outside the loop
    browser = loop.run_until_complete(launch(headless=False, ignoreHTTPSErrors=True))

    # Assuming people_owners_list contains your list of person owner objects
    for person_owner in people_owners_list[:5]:  # Loop through the first 5 person owner objects
        first_name = person_owner.first_name
        last_name = person_owner.last_name
        city = person_owner.city
        state = person_owner.state

        PROXY_USERNAME = 'scraperapi'
        PROXY_PASSWORD = '27b22388a69f09fa1c94117e631357e7'
        
        # Create a new page for each search
        page = loop.run_until_complete(browser.newPage())
        await page.authenticate({'username': PROXY_USERNAME, 'password': PROXY_PASSWORD})
        
        try:
            search_url = f'https://www.intelius.com/results/?firstName={first_name}&lastName={last_name}&city={city}&state={state}'
            await page.goto(search_url, {'timeout': 180000})

            
            # Check if the #people selector is present
            if await page.querySelector('#people'):
                await page.waitForSelector('#people', {'timeout': 30000})
            else: # if people selector isnt present check for the "We've located ...." pop-up box"
                # Check if the popup selector is present
                if await page.querySelector('#page-results > aside.intro-modal > p.intro-modal-heading > span'):
                    # Click the selector
                    await page.click('#page-results > aside.intro-modal > p.intro-modal-heading > span')
                    await page.waitForSelector('#people', {'timeout': 30000})
        
        
            await asyncio.sleep(2)

            name = await page.evaluate('document.querySelector("#people > div > div > ul > li.name").textContent.trim()')
            age = await page.evaluate('document.querySelector("#people > div > div > ul > li.age").textContent.trim()')
            location = await page.evaluate('document.querySelector("#people > div > div > ul > li.location").textContent.trim()')

            # Clean up extracted text
            name = name.replace('\t', '').replace('\n', '')
            age = age.replace('\t', '').replace('\n', '')
            location = location.replace('\t', '').replace('\n', '')

            result = {'name': name, 'age': age, 'location': location}
            print(result)
        
        finally:
            await page.close()  # Close the page after each search
        
    # Close the browser after all searches are complete
finally:
    loop.run_until_complete(browser.close())

# Close the event loop
loop.close()


In [None]:
#iterate through the personal_info ages and seperate into above and below 70
# Create two lists: OwnersOver70 and Owners70AndUnder
OwnersOver70 = []
Owners70AndUnder = []

# Iterate through all the people_owners
for owner in people_owners:
    # age is stored as an attribute in the PersonalInfo object
    age = owner.personal_info.age
    
    # Check if age is greater than 70
    if age is not None and age > 70:
        OwnersOver70.append(owner)
    else:
        Owners70AndUnder.append(owner)


Iterate through the people_owners_list and search each owner on PeopleFinder.com

PeopleFinder.com


----------------
1. Search PeopleFinder.com using owner's first and last name and city and state
2. Navigate to Personal section
3. Read and store first, middle and last namae in PersonalInfo Object
4. Read in and store birth information: age and born
5. Read in death information: Death date
6. Navigate to location history 

1. Search PeopleFinder.com using owner's street address, city, state, and zipcode
2. Navigate to Residents section
3. Check possible residents' names of address searched and see if the last name matches first
3. If last name is found then check if the first name matches - stroe the whole name in the PersonalInfo object
4. If the first and last name match the owner name then check the age of the person and store in PersonalInfo object
5. Navigate to ownership section
6. Open Ownership Hisotry tab
7. Read ownership history and see if the owner is the most recent owner/current owner - store the date of ownership of property in the PersonalInfo object

PersonalInfo object: 
    Name:
    Age: 
    Born: 
    Property Ownership History: 
    Current owner of property: yes or no

SEARCH BY NAME - PeopleFinder.com using owner's name, city, state, and zipcode