# Scraping Code

In [73]:
import os
import requests 
import time

import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options

In [94]:
pd.set_option('display.max_colwidth', None)

In [74]:
# Load webdriver

chrome_options = Options()
#chrome_options.add_argument("--headless") # Ensure GUI is off
chrome_options.add_argument("--no-sandbox")

homedir = os.path.expanduser("~")
webdriver_service = Service(f"{homedir}/chromedriver/stable/chromedriver")

In [75]:
# Open browser
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)

In [76]:
# Going to apartments.com - No sign-in required
url_apartements_com = 'https://www.apartments.com/san-diego-ca/?bb=47tg0ow79M369zq96B'
browser.get(url_apartements_com)

## Scraping Main Data

In [77]:
def get_data_from_listing(listing):
    # Gets information from the first view of the appartment
    property_title = listing.find_element(By.XPATH, './/div[@class="property-title"]').text
    property_address = listing.find_element(By.XPATH, './/div[@class="property-address js-url"]').text
    property_pricing = listing.find_element(By.XPATH, './/p[@class="property-pricing"]').text
    property_beds = listing.find_element(By.XPATH, './/p[@class="property-beds"]').text
    #property_phone = listing.find_element(By.XPATH, './/a[@class="phone-link js-phone js-student-housing"]').text
    property_phone = listing.find_element(By.XPATH, './/div[@class="property-actions"]').text.split('\n')[0]
    try:
        ammenities = listing.find_element(By.XPATH, './/p[@class="property-amenities"]')\
                            .find_elements(By.CSS_SELECTOR, 'span')
        property_ammenities = [am.text for am in ammenities if am.text !='']
    except:
        property_ammenities = []
    try:
        property_main_image = listing.find_element(By.XPATH, './/div[@class="item active us "]')\
                                    .get_attribute('style').split('"')[1]
    except:
        property_main_image = ''
    return {
        'property_title': property_title,
        'property_address': property_address,
        'property_pricing': property_pricing,
        'property_beds': property_beds,
        'property_phone': property_phone,
        'property_ammenities': property_ammenities,
        'property_main_image': property_main_image,
    }

In [87]:
# Getting URL of different listings
n = 25
listing_urls = []
listing_texts = []
listing_data = []
for i in range(1, n+1):
    listing_XPATH = f'//*[@id="placardContainer"]/ul/li[{i}]/article'
    listing = browser.find_element(By.XPATH, listing_XPATH)
    
    listing_data.append(get_data_from_listing(listing))
    
    text_listing = listing.text.split('\n')
    listing_texts.append(text_listing)
    
    listing_url = listing.get_attribute('data-url')
    listing_urls.append(listing_url)

In [88]:
listing_4 = pd.DataFrame(listing_data)

In [86]:
listing_3 = pd.DataFrame(listing_data)

In [84]:
listing_2 = pd.DataFrame(listing_data)

In [82]:
listing_1 = pd.DataFrame(listing_data)

In [92]:
listings = listing_4.append(listing_3).append(listing_2).append(listing_1).reset_index(drop=True)

  listings = listing_4.append(listing_3).append(listing_2).append(listing_1).reset_index(drop=True)
  listings = listing_4.append(listing_3).append(listing_2).append(listing_1).reset_index(drop=True)


In [93]:
listings.to_csv('listings100.csv', index=False)

In [99]:
full_urls_photos = []

In [110]:
photos = browser.find_elements(By.XPATH, './/li[@class="photoItem"]/div')
photos_url = [photo.get_attribute('data-img-src') 
              for photo in photos if photo.get_attribute('data-img-src') is not None]


In [111]:
full_urls_photos.extend(photos_url)
len(full_urls_photos)

338

In [114]:
pd.DataFrame(full_urls_photos, columns=['url_photos']).to_csv('url_photos.csv', index=False)

## Scraping Detailed Data

In [1]:
### Scraping Details

#apartments_info = []
units_data = []

for url in listing_urls:
    browser.get(url)
    time.sleep(1)
    
    browser.execute_script("window.scrollTo(0, 0);")
    photos_button = browser.find_element(By.XPATH, './/button[@class="photoWrapper pillBtn small js-carouselPhotoBtn"]')
    photos_button.click()
    time.sleep(1)
    photos = browser.find_elements(By.XPATH, './/li[@class="photoItem"]/div')
    photos_url = [photo.get_attribute('data-img-src') 
                  for photo in photos if photo.get_attribute('data-img-src') is not None]

    close_button = browser.find_element(By.XPATH, './/button[@class="close"]')
    close_button.click()
    
    
    property_name = browser.find_element(By.XPATH, './/h1[@class="propertyName"]').text
    print(property_name)
    address = browser.find_element(By.XPATH, './/span[@class="delivery-address"]').text
    zip_code = browser.find_element(By.XPATH, './/span[@class="stateZipContainer"]').text
    neighborhood = browser.find_element(By.XPATH, './/a[@class="neighborhood"]').text
    try:
        review_rating = browser.find_element(By.XPATH, './/*[@class="reviewRating"]').text
    except:
        review_rating = ''
    try:
        review_count = browser.find_element(By.XPATH, './/*[@class="reviewCount"]').text
    except:
        review_count = ''
    company_logo_url = browser.find_element(By.XPATH, './/*[@class="pmcLogo"]').get_attribute('src')
    phone_number = browser.find_element(By.XPATH, './/div[@class="phoneNumber"]').text
    
    browser.execute_script("window.scrollTo(0, 1000);")
    time.sleep(1)
    # apartments = browser.find_elements(By.XPATH, '//div[@class="pricingGridItem multiFamily hasUnitGrid"]')
    apartments = browser.find_elements(By.XPATH, '//div[@class="tab-section active"]/div')
    print(f'Num apartments: {len(apartments)}')
    
    for apmnt in apartments:
            time.sleep(1)
#        try:
            model_name = apmnt.find_element(By.XPATH, './/span[@class="modelName"]').text
            if model_name == '':
                break
            model_details = apmnt.find_element(By.XPATH, './/span[@class="detailsTextWrapper"]').text
            model_details = model_details.replace('s,', ',')
            details = {det.strip().split(' ',1)[1].replace(' ', '_'):det.strip().split(' ',1)[0] 
                         for det in model_details.split(', ')}
            details = details if isinstance(details, dict) else {}
#            plan_small_url = apmnt.find_element(By.XPATH, './/div[@class="floorPlanButtonImage"]')\
#                                .get_attribute('data-background-image')

            units = apmnt.find_elements(By.XPATH, './/div[@class="grid-container js-unitExtension"]')

            for unit in units:
                udata = unit.text.split('\n')[:-1]
                unit_data = {(udata[2*i]).lower().replace(' ','_'):udata[2*i+1] 
                         for i in range(len(udata)//2)}
                unit_data['model'] = model_name
                unit_data['details'] = model_details
                unit_data['url_plan'] = plan_small_url

                unit_data['property_name'] = property_name
                unit_data['address'] = address
                unit_data['zip_code'] = zip_code
                unit_data['neighborhood'] = neighborhood
                unit_data['review_rating'] = review_rating
                unit_data['review_count'] = review_count
                unit_data['company_logo_url'] = company_logo_url
                unit_data['phone_number'] = phone_number
                unit_data['photos_url'] = photos_url
                unit_data = unit_data | details 

                units_data.append(unit_data)
            print(f'units_data: {len(units_data)}')

#        except:
#            pass
#    apartments_info.extend(units_data)

In [551]:
pd.DataFrame(units_data).to_csv('apartment_data.csv', index=False)

# Uploading code into the DB

In [228]:
import requests
import json
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings('ignore')

In [229]:
good_reviews = [
"I recently stayed at this apartment and it exceeded all of my expectations. The space was incredibly clean and well-maintained, and all of the furnishings and decor were modern and stylish. The location was also very convenient, with plenty of restaurants and shops nearby. The building itself was very secure and quiet, and the staff were friendly and helpful. Overall, I would highly recommend this apartment to anyone looking for a comfortable and enjoyable stay.",
"I was very impressed with this apartment complex. The amenities were outstanding, including a fitness center, pool, and outdoor grilling area. The apartment itself was spacious and well-appointed, with high-end appliances and a comfortable bed. The location was also excellent, with easy access to public transportation and plenty of local attractions. The management team was also very responsive and accommodating, making the entire rental process smooth and stress-free. I would definitely recommend this apartment to anyone looking for a luxurious and convenient living experience.",
"I had a fantastic experience staying in this apartment. The space was bright, airy, and very clean, with lots of natural light and a beautiful view. The location was also great, with plenty of restaurants and cafes within walking distance. The building itself was very secure and well-maintained, and the staff were friendly and helpful. I would definitely stay here again and would highly recommend it to anyone looking for a comfortable and stylish place to stay."
"This apartment complex is conveniently located near shopping and dining options. The staff is friendly and the grounds are well-maintained.",
"I love living in this apartment! The layout is spacious and the natural light makes it feel even bigger. Plus, the maintenance team is always quick to respond to any issues.",
"The amenities here are amazing - a pool, gym, and outdoor grilling area. It's like living at a resort!",
"The location of this apartment complex is unbeatable - I can walk to work and to all my favorite restaurants. Plus, the views from my balcony are stunning.",
"The leasing process was a breeze and the staff made me feel right at home. The community events they host are a great way to meet my neighbors.",
"I appreciate how pet-friendly this apartment complex is. They have a dog park on-site and even offer pet washing stations!",
"The kitchen in my apartment is a chef's dream. It has plenty of counter space and updated appliances, which makes cooking a joy.",
"I love the modern design of this apartment - the sleek finishes and fixtures give it a luxurious feel.",
"The on-site laundry facilities are a huge plus. I never have to worry about going to a laundromat.",
"The peace and quiet of this complex is a welcome relief after a long day at work. It's the perfect place to relax and unwind.",
    
"I absolutely loved my stay at this apartment! It was cozy, clean, and had all the amenities I needed for a comfortable stay.",
"The location of this apartment is unbeatable - it's within walking distance of shops, restaurants, and public transportation.",
"The apartment was beautifully decorated and felt like a home away from home. I would definitely stay here again!",
"The host was incredibly accommodating and made sure that everything was perfect for my stay. I couldn't have asked for better service!",
"The view from the apartment was breathtaking - I loved waking up to the stunning skyline every morning.",
"The neighborhood was quiet and peaceful, which made for a relaxing and enjoyable stay. I would highly recommend this apartment to anyone looking for a tranquil getaway.",
"The apartment was spotlessly clean and well-maintained. I felt right at home from the moment I walked in the door.",
"The bed was incredibly comfortable, and I slept like a baby every night. I would definitely stay at this apartment again just for the cozy bedding!",
"The kitchen had everything I needed to cook my own meals, which saved me a lot of money on dining out. I appreciated the convenience and cost-savings.",
"The apartment was conveniently located near all the major attractions in the city, which made it easy to explore and experience everything the city has to offer.",

"I recently moved into this apartment complex and I am thoroughly impressed. The amenities are top-notch, the staff is friendly and responsive, and the location is perfect for my needs.",
"I love my cozy little apartment! It's perfect for one person and the location is unbeatable. The building is well-maintained and the staff is always helpful.",
"I've been living in this apartment complex for a few months now and I can't imagine living anywhere else. The community is welcoming, the grounds are beautiful, and the apartments themselves are modern and stylish.",
"This apartment is amazing! The layout is spacious and practical, the appliances are high-quality, and the balcony has a stunning view. I'm so glad I chose to live here.",
"The management team at this apartment complex is fantastic. They go above and beyond to make sure residents are happy and comfortable. I feel valued and taken care of here.",
"I was hesitant to move into an apartment, but this one feels more like a home than a rental. The attention to detail in the design and decor is impressive, and the location is convenient yet peaceful.",
"Living in this apartment complex feels like being part of a community. The shared spaces are inviting and well-maintained, and the neighbors are friendly and respectful. I feel safe and at home here.",
"I've lived in a lot of apartments over the years, but this one has the best management by far. They are professional, kind, and always quick to respond to any issues. It makes a huge difference in quality of life.",
"This apartment has exceeded my expectations in every way. The finishes are high-end, the appliances are energy-efficient, and the location is unbeatable. I feel like I'm living in luxury without breaking the bank.",
"I appreciate the attention to sustainability in this apartment complex. From the low-flow faucets to the on-site recycling program, it's clear that the management cares about the environment and the community. It's a great place to live.",
]

In [230]:
bad_reviews = [
"The walls in this apartment complex are paper-thin. I can hear every conversation my neighbors have, and it's incredibly frustrating.",
"The management team here is unresponsive and unhelpful. It's impossible to get anything done, and it feels like they don't care about the residents at all.",
"The maintenance staff is slow to respond to requests, and when they do show up, they often don't fix the issue properly.",
"The amenities in this apartment complex are lackluster. The gym is tiny and poorly equipped, and the pool is always crowded and dirty.",
"The parking situation here is a nightmare. There are never any spots available, and the few that are open are always taken by people who don't live here.",
"The appliances in this apartment are outdated and barely functional. It's frustrating to have to deal with things like a temperamental stove or a noisy fridge on a daily basis.",
"The building itself is in poor condition. The paint is peeling, the carpets are stained, and there's a pervasive smell of mold and mildew.",
"The rent here is exorbitant for what you get. There are much nicer apartments in the area for the same price or less.",
"The location of this apartment complex is terrible. It's far from everything and there's nothing of interest nearby.",
"The neighbors in this complex are noisy and inconsiderate. I'm constantly woken up by parties and loud music.",
"The security here is subpar. Anyone can walk in and out of the building without a key, and there have been several break-ins in the past year.",
"The internet and cable service provided by the complex is slow and unreliable. It's frustrating to pay for a service that doesn't work properly.",
"The management team is constantly changing, and it's hard to know who to go to for help. There's no continuity or consistency in how things are handled.",
"The pet policy in this complex is overly strict. It's difficult to find a place to live if you have a pet, and even when you do, there are so many restrictions that it hardly seems worth it.",
"The laundry facilities here are a joke. There are never enough machines, and they're always out of order.",
"The walls and ceilings in this apartment are so thin that I can hear my upstairs neighbors walking around all day and night. It's incredibly frustrating.",
"The heating and cooling system in this apartment is ancient and inefficient. I'm constantly either too hot or too cold.",
"The kitchen in this apartment is tiny and impractical. There's barely enough counter space to prepare a meal.",
"The common areas in this complex are dirty and poorly maintained. It's not a pleasant place to spend time.",
"The staff in this complex are rude and unprofessional. It's hard to feel comfortable or welcome here.",
"The water pressure in this apartment is terrible. It takes forever to wash my hair or do dishes.",
"The parking garage is a disaster. It's poorly lit and feels unsafe, and there's always trash and debris littering the ground.",
"The windows in this apartment are old and drafty. It's impossible to keep the place warm in the winter.",
"The carpets in this apartment are old and stained. It's not a clean or pleasant living environment.",
"The rent increases every year without any corresponding improvements or upgrades. It feels like we're being taken advantage of."
]

In [231]:
neutral_reviews = [
"The location of this apartment complex is convenient, with easy access to major highways and shopping centers.",
"The amenities in this apartment complex are average, with a decent gym and a clean pool.",
"The maintenance staff is generally prompt in responding to requests, and they do a decent job of fixing things.",
"The parking situation here is average, with enough spaces for residents but sometimes crowded during peak hours.",
"The appliances in this apartment are standard and functional, with no major issues to report.",
"The building itself is well-maintained, with clean hallways and common areas.",
"The rent here is reasonable for the area, with no major complaints about the value.",
"The neighbors in this complex are generally quiet and respectful, with occasional noise from time to time.",
"The security here is standard, with a gated entrance and cameras in common areas.",
"The internet and cable service provided by the complex is reliable, but nothing to write home about.",
"The management team is responsive, but not overly friendly or personable.",
"The pet policy in this complex is reasonable, with a pet deposit and monthly fee required.",
"The laundry facilities here are average, with enough machines for residents but sometimes in need of maintenance.",
"The walls and ceilings in this apartment are standard, with occasional noise from neighboring units.",
"The heating and cooling system in this apartment is standard, with occasional temperature fluctuations.",
"The kitchen in this apartment is average, with enough counter space and standard appliances.",
"The common areas in this complex are standard, with occasional cleaning and maintenance.",
"The staff in this complex are polite, but not particularly outgoing or friendly.",
"The water pressure in this apartment is average, with occasional fluctuations.",
"The parking garage is average, with enough spaces for residents but sometimes cramped.",
"The windows in this apartment are standard, with occasional drafts.",
"The carpets in this apartment are average, with occasional stains and wear.",
"The rent is inclusive of some utilities, which is a nice perk."
]

In [308]:
port = 8100
url_user_post = f'http://127.0.0.1:{port}/signup/'
url_home_post = f'http://127.0.0.1:{port}/addhome/'
url_comment_post = f'http://127.0.0.1:{port}/addcomment/'

url_home_get = f'http://127.0.0.1:{port}/gethomes/'
url_comment_get = f'http://127.0.0.1:{port}/getcomments/'

### Loading USERS

In [309]:
# Users Table
num_users = 30
users = pd.DataFrame(np.arange(1, num_users+1), columns=['id'])
users['username'] = 'user_'+users['id'].astype(str)
users['password'] = '12345678'
users['email'] = users['username'] + '@gmail.com'
users['is_landlord'] = np.random.choice(a=[True, False], size=num_users)
users['first_name'] = users['username'] + ' Smith'
users['end_phone'] = np.random.randint(1001, 9999, num_users)
users['last_name'] = '(858)-319-'+users['end_phone'].astype(str)
del users['end_phone']
users

Unnamed: 0,id,username,password,email,is_landlord,first_name,last_name
0,1,user_1,12345678,user_1@gmail.com,False,user_1 Smith,(858)-319-7908
1,2,user_2,12345678,user_2@gmail.com,True,user_2 Smith,(858)-319-5848
2,3,user_3,12345678,user_3@gmail.com,False,user_3 Smith,(858)-319-9210
3,4,user_4,12345678,user_4@gmail.com,False,user_4 Smith,(858)-319-9599
4,5,user_5,12345678,user_5@gmail.com,True,user_5 Smith,(858)-319-3562
5,6,user_6,12345678,user_6@gmail.com,True,user_6 Smith,(858)-319-8432
6,7,user_7,12345678,user_7@gmail.com,True,user_7 Smith,(858)-319-4651
7,8,user_8,12345678,user_8@gmail.com,False,user_8 Smith,(858)-319-7072
8,9,user_9,12345678,user_9@gmail.com,True,user_9 Smith,(858)-319-9867
9,10,user_10,12345678,user_10@gmail.com,True,user_10 Smith,(858)-319-9495


In [310]:
# Load Data Users
for i in range(num_users):
    user_data = users.iloc[i].to_dict()
    requests.post(url_user_post, json=user_data)

### Loading HOMES

In [311]:
data_home_sample = json.loads('''{
        "title": "Sunny Appartment",
        "description": "This is a nice Appartment",
        "state": "CA",
        "city": "San Diego",
        "address": "street 2",
        "zipcode": 92092,
        "location_lat": 34.0,
        "location_lon": 43.0,
        "photos": [
            "https://www.udr.com/globalassets/markets/los-angeles/losangeles_1900x874_3033wilshire_model_2016_b2g-ph_unit1702_liv2_jimb.jpg",
            "https://www.apartments.com/images/default-source/2019-naa/parkline-apartment-in-miami-fla2dc2731-e6f2-4dca-89c5-38245ccacea1.tmb-featuredim.jpg"
        ],
        "built_date": "2012-10-12",
        "move_in_date": "2023-03-02",
        "area_sqft": 221,
        "num_bedrooms": 5,
        "num_bathrooms": 2,
        "current_price_month": 10121,
        "has_garage": false,
        "is_furnished": true,
        "is_booked": false,
        "num_views": 3,
        "num_saves": 2,
        "landlord": 1
}''')

In [312]:
home_cols = list(data_home_sample)

In [313]:
def append_lists(row):
    main_img = row['main_img']
    test_img = row['test']
    return list(main_img) + list(test_img)

In [314]:
num_homes = 100

homes = listings.head(num_homes)
homes['title'] = homes['property_title']
homes['description'] = homes['property_title'] + " is a beautiful place close to multiple ammenities"
homes['state'] = 'CA'
homes['city'] = 'San Diego'
homes['address'] = homes['property_address']
homes['zipcode'] = homes['property_address'].str.split().str[-1]
homes['location_lat'] = np.random.uniform(32, 34, num_homes)
homes['location_lon'] = np.random.uniform(117, 119, num_homes)
homes['main_img'] = homes[['property_main_image']].values.tolist()
homes['test'] = list(np.random.choice(full_urls_photos, (num_homes, 7)))
homes['photos'] = homes.apply(append_lists, axis=1)


homes['b1'] = np.random.randint(2000, 2018, num_homes)
homes['b2'] = np.random.randint(1, 12, num_homes)
homes['b3'] = np.random.randint(1, 30, num_homes)

homes['c1'] = np.random.randint(2022, 2024, num_homes)
homes['c2'] = np.random.randint(1, 12, num_homes)
homes['c3'] = np.random.randint(1, 30, num_homes)

homes['built_date'] = homes[['b1', 'b2', 'b3']].astype(str).apply(lambda x: '-'.join(x), axis=1)
homes['move_in_date'] = homes[['c1', 'c2', 'c3']].astype(str).apply(lambda x: '-'.join(x), axis=1)

homes["area_sqft"] = np.random.randint(190, 350, num_homes)
homes["num_bedrooms"] = np.random.randint(0, 5, num_homes)
homes["num_bathrooms"] = np.random.randint(0, 3, num_homes)
homes["current_price_month"] = np.random.randint(1090, 4500, num_homes)
homes["has_garage"] = np.random.choice(a=[True, False], size=num_homes)
homes["is_furnished"] = np.random.choice(a=[True, False], size=num_homes)
homes["is_booked"] = np.random.choice(a=[True, False], size=num_homes)
homes["num_views"] = np.random.randint(190, 350, num_homes)
homes["num_saves"] = np.random.randint(1, 30, num_homes)
homes["landlord"] = np.random.randint(1, 4, num_homes)

In [316]:
# Load Data Homes
for i in range(num_homes):
    home_data = homes[home_cols].iloc[i].to_dict()
    requests.post(url_home_post, json=home_data)

### Loading Comments

In [317]:
def get_comments(score=5):
    if score in [5, 4]:
        return np.random.choice(good_reviews+neutral_reviews)
    elif score in [3, 2]:
        return np.random.choice(good_reviews+neutral_reviews+bad_reviews)
    elif score == 1:
        return np.random.choice(neutral_reviews+bad_reviews)

In [318]:
num_comments = 1000
comments = pd.DataFrame(np.arange(1, num_comments+1), columns=['id'])
comments['rating'] = np.random.randint(1, 6, num_comments)
comments['comment'] = comments['rating'].apply(get_comments)
comments['home'] = np.random.randint(1, 100, num_comments)
comments['user'] = np.random.randint(1, 30, num_comments)

In [319]:
# Load Data Homes
for i in range(num_comments):
    comment_data = comments.iloc[i].to_dict()
    requests.post(url_comment_post, json=comment_data)

### Sample requests

In [19]:
data_user_sample = json.loads('''{
    "username": "User1",
    "password": "Pass",
    "email": "user@gmail.com",
    "is_landlord": true
}''')

In [21]:
data_comment_sample = {
    "comment": "this is very good",
    "rating": 3,
    "user": 2,
    "home": 1
}

In [31]:
data_home_sample = json.loads('''{
        "title": "Sunny Appartment",
        "description": "This is a nice Appartment",
        "state": "CA",
        "city": "San Diego",
        "address": "street 2",
        "zipcode": 92092,
        "location_lat": 34.0,
        "location_lon": 43.0,
        "photos": [
            "https://www.udr.com/globalassets/markets/los-angeles/losangeles_1900x874_3033wilshire_model_2016_b2g-ph_unit1702_liv2_jimb.jpg",
            "https://www.apartments.com/images/default-source/2019-naa/parkline-apartment-in-miami-fla2dc2731-e6f2-4dca-89c5-38245ccacea1.tmb-featuredim.jpg"
        ],
        "built_date": "2012-10-12",
        "move_in_date": "2023-03-02",
        "area_sqft": 221,
        "num_bedrooms": 5,
        "num_bathrooms": 2,
        "current_price_month": 10121,
        "has_garage": false,
        "is_furnished": true,
        "is_booked": false,
        "num_views": 3,
        "num_saves": 2,
        "landlord": 1
}''')