# Jacksonville Apartment Population Statistics

In this notebook I will be parsing through some of the rental properties in Jacksonville to get information on them.

In [129]:
import requests
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
import platform
from bs4 import BeautifulSoup
import pandas as pd

Get first page

In [130]:
# Set up empty dataframe to store properties
complexes = pd.DataFrame(columns = ['URL', 'Complex Name', 'Address'])
apartmentsURL = 'https://www.apartments.com/jacksonville-fl/'

# Initialize web driver
web_driver = None
if(web_driver != None):
    driver = web_driver
else:
    options = Options()
    options.headless = True
    if ('debian' in platform.platform()):
        driver = webdriver.Firefox(firefox_binary='/usr/bin/firefox-esr', options=options)
    else:
        driver = webdriver.Firefox(options=options)
driver.get(apartmentsURL)

# Read the current page
soup = BeautifulSoup(driver.page_source, 'html.parser')
soup.prettify()
soup = soup.find('div', class_ = 'placardContainer')

# Read placard info
for index, item in enumerate(soup.find_all('a', attrs = {'aria-label' : True, "class": "property-link"})):
    URL = item.get('href')
    name = item.find('span', class_="js-placardTitle title").getText().strip() if item.find('span', class_="js-placardTitle title") else None
    address = item.find('div', class_ = 'property-address js-url').getText().strip() if item.find('div', class_ = 'property-address js-url') else None

    if name:
        print(name, URL, address)
        if name not in complexes['Complex Name']:
            complexes.loc[len(complexes)] = [URL, name, address]


pages = int(soup.find('span', class_ = "pageRange").getText().strip().split(' ')[-1])

The Reef Apartments https://www.apartments.com/the-reef-apartments-jacksonville-fl/x3mk9dp/ 2753 Mayport Rd, Jacksonville, FL 32233
Volta https://www.apartments.com/volta-jacksonville-fl/fr9fqfs/ 11391 Square St, Jacksonville, FL 32256
Forena Luxury Living https://www.apartments.com/forena-luxury-living-jacksonville-fl/rr2s1k2/ 11727 Abess Blvd, Jacksonville, FL 32225
Collins Preserve https://www.apartments.com/collins-preserve-jacksonville-fl/zqet90z/ 5258 Collins Preserve Ln, Jacksonville, FL 32244
Seaton Preserve https://www.apartments.com/seaton-preserve-jacksonville-fl/yj0rn3r/ 1173 Pecan Park Rd, Jacksonville, FL 32218
Presidium Regal https://www.apartments.com/presidium-regal-jacksonville-fl/z5q53cw/ 14051 Beach Blvd, Jacksonville, FL 32250
Pinnacle https://www.apartments.com/pinnacle-jacksonville-fl/jbf6fm6/ 8760 Pinnacle Park Blvd, Jacksonville, FL 32256
The Maggie Flats https://www.apartments.com/the-maggie-flats-jacksonville-fl/ebvrkfq/ 3730 DuPont Ave, Jacksonville, FL 3221

<a aria-label="The Reef Apartments, Jacksonville, FL" class="property-link" href="https://www.apartments.com/the-reef-apartments-jacksonville-fl/x3mk9dp/">
<div class="property-title" title="The Reef Apartments, Jacksonville, FL"><span class="js-placardTitle title">The Reef Apartments</span></div>
<div class="property-address js-url" title="2753 Mayport Rd, Jacksonville, FL 32233">2753 Mayport Rd, Jacksonville, FL 32233</div>
</a>

Get the rest of the pages

In [131]:
print(pages)

18


In [132]:
for page in range(2,pages):
    apartmentsURL = 'https://www.apartments.com/jacksonville-fl/{}/'.format(page)
    driver.get(apartmentsURL)

    # Read the current page
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    soup.prettify()
    soup = soup.find('div', class_ = 'placardContainer')

    # Read placard info
    for index, item in enumerate(soup.find_all('a', attrs = {'aria-label' : True, "class": "property-link"})):
        URL = item.get('href')
        name = item.find('span', class_="js-placardTitle title").getText().strip() if item.find('span', class_="js-placardTitle title") else None
        address = item.find('div', class_ = 'property-address js-url').getText().strip() if item.find('div', class_ = 'property-address js-url') else None

        if name:
            print(name, URL, address)
            if name not in complexes['Complex Name']:
                complexes.loc[len(complexes)] = [URL, name, address]
    

driver.close()

The Ridley https://www.apartments.com/the-ridley-jacksonville-fl/y5ebpws/ 12517 Beach Blvd, Jacksonville, FL 32246
Lofts at Baymeadows https://www.apartments.com/lofts-at-baymeadows-jacksonville-fl/0l2k8h0/ 8050 Baymeadows Cir W, Jacksonville, FL 32256
Terrabella https://www.apartments.com/terrabella-jacksonville-fl/547f4mn/ 13723 Atlantic Blvd, Jacksonville, FL 32225
Tapestry at Westland Village https://www.apartments.com/tapestry-at-westland-village-jacksonville-fl/qk3mprw/ 6505 Collins Rd, Jacksonville, FL 32244
Argyle Lake at Oakleaf Town Center https://www.apartments.com/argyle-lake-at-oakleaf-town-center-jacksonville-fl/e4h1yzq/ 9849 Crosshill Blvd, Jacksonville, FL 32222
The Felix https://www.apartments.com/the-felix-jacksonville-fl/pr75y3t/ 11723 Wells Creek Pky, Jacksonville, FL 32256
RISE Julington https://www.apartments.com/rise-julington-jacksonville-fl/5kc5tzp/ 12397 San Jose Blvd, Jacksonville, FL 32223
Fountainhead Apartments https://www.apartments.com/fountainhead-apart

In [133]:
complexes.to_csv('Jacksonville Complexes.csv', index = False)

Data Cleaning

In [134]:
complexes

Unnamed: 0,URL,Complex Name,Address
0,https://www.apartments.com/the-reef-apartments...,The Reef Apartments,"2753 Mayport Rd, Jacksonville, FL 32233"
1,https://www.apartments.com/volta-jacksonville-...,Volta,"11391 Square St, Jacksonville, FL 32256"
2,https://www.apartments.com/forena-luxury-livin...,Forena Luxury Living,"11727 Abess Blvd, Jacksonville, FL 32225"
3,https://www.apartments.com/collins-preserve-ja...,Collins Preserve,"5258 Collins Preserve Ln, Jacksonville, FL 32244"
4,https://www.apartments.com/seaton-preserve-jac...,Seaton Preserve,"1173 Pecan Park Rd, Jacksonville, FL 32218"
...,...,...,...
675,https://www.apartments.com/339-gan-wy-jacksonv...,"339 Gan Wy, Jacksonville, FL 32259",
676,https://www.apartments.com/11512-dandelion-way...,"11512 Dandelion Way, Jacksonville, FL 32223",
677,https://www.apartments.com/7110-yowdy-star-ln-...,"7110 Yowdy Star Ln, Jacksonville, FL 32244",
678,https://www.apartments.com/8165-cheryl-ann-ln-...,"8165 Cheryl Ann Ln, Jacksonville, FL 32244",


In [135]:
zipCode = complexes.iloc[675]["Complex Name"][-5:]
print(zipCode.isdigit())

True


It appears that some of the listings were not apartment complexes, but individual rental properties. They are saved with no address, but the complex name is the address. lets fix that.

In [136]:
# All of the mentioned listings have the zip code at the end. we can use that to separate.

def notAComplex(row):
    if row['Address'] is None and row['Complex Name'] and row['Complex Name'][-5:].isdigit():
        row['Address'] = row['Complex Name']
        row['Complex Name'] = None
    return row

complexes = complexes.apply(notAComplex, axis = 1)

In [137]:
complexes

Unnamed: 0,URL,Complex Name,Address
0,https://www.apartments.com/the-reef-apartments...,The Reef Apartments,"2753 Mayport Rd, Jacksonville, FL 32233"
1,https://www.apartments.com/volta-jacksonville-...,Volta,"11391 Square St, Jacksonville, FL 32256"
2,https://www.apartments.com/forena-luxury-livin...,Forena Luxury Living,"11727 Abess Blvd, Jacksonville, FL 32225"
3,https://www.apartments.com/collins-preserve-ja...,Collins Preserve,"5258 Collins Preserve Ln, Jacksonville, FL 32244"
4,https://www.apartments.com/seaton-preserve-jac...,Seaton Preserve,"1173 Pecan Park Rd, Jacksonville, FL 32218"
...,...,...,...
675,https://www.apartments.com/339-gan-wy-jacksonv...,,"339 Gan Wy, Jacksonville, FL 32259"
676,https://www.apartments.com/11512-dandelion-way...,,"11512 Dandelion Way, Jacksonville, FL 32223"
677,https://www.apartments.com/7110-yowdy-star-ln-...,,"7110 Yowdy Star Ln, Jacksonville, FL 32244"
678,https://www.apartments.com/8165-cheryl-ann-ln-...,,"8165 Cheryl Ann Ln, Jacksonville, FL 32244"


In [138]:
complexes.to_csv('Jacksonville Complexes.csv', index = False)

In [139]:

web_driver = None
if(web_driver != None):
    driver = web_driver
else:
    options = Options()
    options.headless = True
    if ('debian' in platform.platform()):
        driver = webdriver.Firefox(firefox_binary='/usr/bin/firefox-esr', options=options)
    else:
        driver = webdriver.Firefox(options=options)





In [149]:
urlList = list(complexes["URL"])

unitsDF = pd.DataFrame(columns = ["Complex URL", "Floor Plan Name", "Price", "Unit Name", "Square Footage", "Availability", "Beds", "Baths"])

for index, url in enumerate(urlList):
    print(index)
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    soup.prettify()
    if soup.find('div', id = "pricingView") is not None:
        floorPlans = soup.find('div', id = "pricingView")
        floorPlans = floorPlans.findAll('div', class_ = "pricingGridItem multiFamily hasUnitGrid")
        for floorPlan in floorPlans:
            print(plan)
            plan = floorPlan.find('span', class_ = "modelName").text
            units = floorPlan.findAll('li', class_ = 'unitContainer js-unitContainer')
            for unit in units:
                unitName = unit.find('span', class_ = None).text
                #price = unit.find('span', class_ = "unitBtn viewPriceCta js-viewPriceBy-modal active clickable").text.strip()[1:].replace(',', '') if unit.find('span', class_ = "unitBtn viewPriceCta js-viewPriceBy-modal active clickable")
                price = unit.find('div', class_ = "pricingColumn column").findAll('span')[1].text.strip().replace(',','').replace('$','')
                area = unit.find('div', class_ = 'sqftColumn column').find('span', class_ = None).text
                availability = unit.find('span', class_ = "dateAvailable").get_text(strip=True).replace('availibility', '')
                print(unitName, price, area, availability)
                unitsDF.loc[len(unitsDF)] = [url, plan, price, unitName, area, availability, beds, baths]
    else:
            if soup.find('p', class_ = 'rentInfoDetail'):
                unitName = None
                price = soup.find('p', class_ = 'rentInfoDetail').text.strip().replace(',','') if soup.find('p', class_ = 'rentInfoDetail') else None
                beds = soup.findAll('p', class_ = 'rentInfoDetail')[1].text.strip() if soup.findAll('p', class_ = 'rentInfoDetail')[1] else None
                baths = soup.findAll('p', class_ = 'rentInfoDetail')[2].text.strip() if soup.findAll('p', class_ = 'rentInfoDetail')[2] else None
                area = soup.findAll('p', class_ = 'rentInfoDetail')[3].text.strip() if soup.findAll('p', class_ = 'rentInfoDetail')[3] else None

                print(unitName, price, area, availability, beds, baths)
                unitsDF.loc[len(unitsDF)] = [url, plan, price, unitName, area, availability, beds, baths]

unitsDF.to_csv('Jacksonville Units.csv', index = False)

0
A1-C
1415 1480 640 Now
2421 1480 640 Now
2327 1480 640 Oct 31
Bali
1213 1625 775 Now
2137 1625 775 Now
Bimini
2518 1580 709 Oct 29
1332 1580 709 Oct 31
2530 1580 709 Nov 3
Biscayne
1544 1595 770 Oct 31
1224 1595 770 Dec 9
2124 1595 770 Dec 29
Bahama
2419 1950 928 Now
1312 1950 928 Oct 31
2338 1950 928 Oct 31
Santorini
1240 2199 1,160 Now
1540 2199 1,160 Now
1340 2199 1,160 Oct 27
Silhouette
1136 2100 1,082 Oct 31
2114 2100 1,082 Nov 1
2336 2100 1,082 Nov 8
Santa Luiza
2102 2199 1,172 Oct 31
2302 2199 1,172 Oct 31
1202 2199 1,172 Dec 31
Seychelle
2249 2695 1,405 Oct 30
Antigua
1415 1480 640 Now
2421 1480 640 Now
2327 1480 640 Oct 31
Bali
1213 1625 775 Now
2137 1625 775 Now
Bimini
2518 1580 709 Oct 29
1332 1580 709 Oct 31
2530 1580 709 Nov 3
Biscayne
1544 1595 770 Oct 31
1224 1595 770 Dec 9
2124 1595 770 Dec 29
Bahama
2419 1950 928 Now
1312 1950 928 Oct 31
2338 1950 928 Oct 31
Santorini
1240 2199 1,160 Now
1540 2199 1,160 Now
1340 2199 1,160 Oct 27
Silhouette
1136 2100 1,082 Oct 31
211

In [141]:
print(availability.replace('availibility', ''))

Now
