### Imports

In [43]:
from bs4 import BeautifulSoup
import requests
import pandas as pd 
import sqlalchemy

### HTTP Request

#### store website in variable

In [44]:
website = 'https://www.justia.com/lawyers/california/san-francisco'

#### Get Request

In [45]:
response = requests.get(website)

#### Status Code

In [46]:
response

<Response [200]>

### Soup Object

In [48]:
soup = BeautifulSoup(response.content, 'html.parser')
# soup 

### Results

In [49]:
results = soup.find_all('div', {'data-vars-action':'OrganicListing'})

In [50]:
len(results)

40

### Target necessary data

In [None]:
# Name
# Short Bio
# Specialization
# University
# Address
# Phone
# Email Link

#### Name

In [54]:
results[0].find('strong', {'class':'lawyer-name'}).get_text().strip()

'Doug Bend'

#### Short Bio

In [57]:
results[0].find('div', {'class':'lawyer-expl'}).get_text().strip()

'San Francisco, CA Attorney'

#### Specialization

In [59]:
results[0].find('span', {'class':'-practices'}).get_text()

'Business, Entertainment & Sports, Real Estate and Trademarks'

#### University

In [61]:
results[0].find('span', {'class':'-law-schools'}).get_text()

'Georgetown University Law Center'

#### Address

In [66]:
results[0].find('span', {'class':'-address'}).get_text().strip().replace('\t', '').replace('\n', '') 

'2181 Greenwich StreetSan Francisco,CA 94123'

#### Phone

In [69]:
results[0].find('strong', {'class':'-phone'}).get_text().strip()

'(415) 633-6841'

#### Email Link

In [71]:
results[0].find('a', {'class':'-email'}).get('href')

'https://lawyers.justia.com/lawyer/doug-bend-1662623/contact'

### Put everything together inside a For-Loop

In [75]:
name = []
short_bio = []
specialization = []
university = []
address = []
phone = []
email_link = []

for result in results:
    
    # name
    try:
        name.append(result.find('strong', {'class':'lawyer-name'}).get_text().strip())
    except:
        name.append('')
    
    # short bio
    try:
        short_bio.append(result.find('div', {'class':'lawyer-expl'}).get_text().strip())
    except:
        short_bio.append('')
    
    # specialization
    try:
        specialization.append(result.find('span', {'class':'-practices'}).get_text())
    except:
        specialization.append('')
    
    # university
    try:
        university.append(result.find('span', {'class':'-law-schools'}).get_text())
    except:
        university.append('')
    
    # address
    try:
        address.append(result.find('span', {'class':'-address'}).get_text().strip().replace('\t', '').replace('\n', ''))
    except:
        address.append('')
    
    # phone
    try:
        phone.append(result.find('strong', {'class':'-phone'}).get_text().strip())
    except:
        phone.append('')
    
    # email
    try:
        email_link.append(result.find('a', {'class':'-email'}).get('href'))
    except:
        email_link.append('')

### Create Pandas Dataframe

In [76]:
df_lawyers = pd.DataFrame({'lawyer_name': name, 'short_bio': short_bio, 'specialization':specialization,
                          'university':university, 'address':address, 'phone':phone, 'email':email_link})

In [77]:
df_lawyers

Unnamed: 0,lawyer_name,short_bio,specialization,university,address,phone,email
0,Doug Bend,"San Francisco, CA Attorney","Business, Entertainment & Sports, Real Estate ...",Georgetown University Law Center,"2181 Greenwich StreetSan Francisco,CA 94123",(415) 633-6841,https://lawyers.justia.com/lawyer/doug-bend-16...
1,Michael Paul Ehline,"San Francisco, CA Lawyer with 16 years of expe...","Consumer, Elder, Personal Injury and Products ...",UWLA,"50 Francisco St#460San Francisco,CA 94133",(888) 400-9721,https://lawyers.justia.com/lawyer/michael-paul...
2,"Ali Shahrestani, Esq.","San Francisco, CA Lawyer with 14 years of expe...","Business, Criminal, Divorce and Education",University of California Hastings College of t...,"One Embarcadero CenterSuite 500San Francisco,C...",(800) 510-3916,https://lawyers.justia.com/lawyer/ali-shahrest...
3,Christina Weed,"San Francisco, CA Lawyer with 12 years of expe...","Business, Estate Planning, Probate and Tax",University of San Diego School of Law and Whit...,"201 Spear StreetSuite 1100San Francisco,CA 94105",(925) 953-2920,https://lawyers.justia.com/lawyer/christina-we...
4,Joseph Tobener,"San Francisco, CA Lawyer with 22 years of expe...",Landlord Tenant,University of California Hastings College of t...,"21 Masonic AvenueSte ASan Francisco,CA 94118",(415) 504-2165,https://lawyers.justia.com/lawyer/joseph-toben...
5,Arnold Isaac Berschler,10.0 (1 Peer Review),"Civil Rights, Maritime and Personal Injury",Temple University Beasley School of Law,"22 Battery StreetSuite 810San Francisco,CA 94111",(800) 338-1441,https://lawyers.justia.com/lawyer/arnold-isaac...
6,Aaron R. Bortel,10.0 (3 Peer Reviews),DUI,University of California - Santa Barbara and G...,"650 5th St.Suite 508San Francisco ,CA 94107",(415) 247-0700,https://lawyers.justia.com/lawyer/aaron-r-bort...
7,Garrett Sutton,"San Fransisco, CA Attorney with 43 years of ex...",Business,University of California Hastings College of t...,"567 Sutter Street, Third FloorSan Fransisco,CA...",(800) 600-1760,https://lawyers.justia.com/lawyer/garrett-sutt...
8,George W. Wolff Esq.,"San Francisco, CA Attorney with 48 years of ex...","Arbitration & Mediation, Business, Constructio...",University of California Hastings College of t...,"580 California St#1236San Francisco,CA 94104",(415) 788-1881,https://lawyers.justia.com/lawyer/george-w-wol...
9,Reno F.R. Fernandez III,"San Francisco, CA Lawyer with 14 years of expe...",Bankruptcy and Business,Golden Gate University School of Law,"221 Sansome StreetSan Francisco,CA 94104",(415) 362-0449,https://lawyers.justia.com/lawyer/reno-f-r-fer...


### Output in Excel

In [78]:
df_lawyers.to_excel('lawyers_single.xlsx', index=False)

### Part 2 - Pagination - Scrape 20 Pages

In [80]:
name = []
short_bio = []
specialization = []
university = []
address = []
phone = []
email_link = []


for i in range(1,21):
    
    website = 'https://www.justia.com/lawyers/california/san-francisco?page=' + str(i) 
    
    response = requests.get(website)
    
    soup = BeautifulSoup(response.content, 'html.parser')
    
    results = soup.find_all('div', {'data-vars-action':'OrganicListing'})
    
    for result in results:
    
        # name
        try:
            name.append(result.find('strong', {'class':'lawyer-name'}).get_text().strip())
        except:
            name.append('')

        # short bio
        try:
            short_bio.append(result.find('div', {'class':'lawyer-expl'}).get_text().strip())
        except:
            short_bio.append('')

        # specialization
        try:
            specialization.append(result.find('span', {'class':'-practices'}).get_text())
        except:
            specialization.append('')

        # university
        try:
            university.append(result.find('span', {'class':'-law-schools'}).get_text())
        except:
            university.append('')

        # address
        try:
            address.append(result.find('span', {'class':'-address'}).get_text().strip().replace('\t', '').replace('\n', ''))
        except:
            address.append('')

        # phone
        try:
            phone.append(result.find('strong', {'class':'-phone'}).get_text().strip())
        except:
            phone.append('')

        # email
        try:
            email_link.append(result.find('a', {'class':'-email'}).get('href'))
        except:
            email_link.append('')
    
    
df_lawyers_multiple = pd.DataFrame({'lawyer_name': name, 'short_bio': short_bio, 'specialization':specialization,
                          'university':university, 'address':address, 'phone':phone, 'email':email_link})    

df_lawyers_multiple

Unnamed: 0,lawyer_name,short_bio,specialization,university,address,phone,email
0,Doug Bend,"San Francisco, CA Attorney","Business, Entertainment & Sports, Real Estate ...",Georgetown University Law Center,"2181 Greenwich StreetSan Francisco,CA 94123",(415) 633-6841,https://lawyers.justia.com/lawyer/doug-bend-16...
1,Sam Amin,"San Francisco, CA Attorney",Criminal and DUI,John F. Kennedy University,"75 Broadway StreetSuite 202San Francisco,CA 94111",(415) 300-2037,https://lawyers.justia.com/lawyer/sam-amin-150...
2,Debra Schoenberg,"San Francisco, CA Attorney with 34 years of ex...",Divorce and Family,University of New Hampshire School of Law,"575 Market StreetSan Francisco,CA 94105",(415) 834-1120,https://lawyers.justia.com/lawyer/debra-schoen...
3,"Ali Shahrestani, Esq.","San Francisco, CA Lawyer with 14 years of expe...","Business, Criminal, Divorce and Education",University of California Hastings College of t...,"One Embarcadero CenterSuite 500San Francisco,C...",(800) 510-3916,https://lawyers.justia.com/lawyer/ali-shahrest...
4,Richard Alexander,"San Francisco, CA Attorney with 50 years of ex...","Asbestos, Consumer, Personal Injury and Produc...",The University of Chicago Law School,"1 Sansome StreetSuite 3500San Francisco,CA 94104",(415) 921-1776,https://lawyers.justia.com/lawyer/richard-alex...
...,...,...,...,...,...,...,...
795,Christian Pedersen,"Richmond, CA Attorney",Personal Injury,Arizona State University,"3150 Hilltop Mall RoadRichmond,CA 94806",(707) 426-5300,https://lawyers.justia.com/lawyer/christian-pe...
796,Paul Gerard Minoletti,"San Mateo, CA Attorney with 36 years of experi...","Arbitration & Mediation, Business, Constructio...",Golden Gate Univ School of Law,"Waters Technology Park1 Waters Park Drive, Sui...",(650) 638-9601,https://lawyers.justia.com/lawyer/paul-gerard-...
797,Stuart Milton Flashman,"Oakland, CA Lawyer with 31 years of experience",Environmental,New Coll of CA School of Law,"5626 Ocean View DrOakland,CA 94618",(510) 652-5373,https://lawyers.justia.com/lawyer/stuart-milto...
798,Thomas John Greenberg,10.0 (1 Peer Review),"Civil Rights, Criminal, DUI and Domestic Violence",,"605 Middlefield Rd.Redwood City,CA 94063",(650) 242-0021,https://lawyers.justia.com/lawyer/thomas-john-...


#### Excel

In [81]:
df_lawyers_multiple.to_excel('lawyers_multiple.xlsx', index=False)

#### Postgres

In [83]:
# create sqlalchemy engine
engine = sqlalchemy.create_engine('postgres://postgres:12345@localhost:5432')
df_lawyers_multiple.to_sql('lawyers', engine, index = False)