## Libraries required for the notebook

In [1]:
import pandas as pd
import numpy as np
import re


import requests
from requests import get
from bs4 import BeautifulSoup

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium import webdriver

import time
import timeit


## Scrapping all the links from the homepage

In [2]:
## This chunk of code is required for scrapping all the links from the given website. Please note that the selenium package 
## requires the webdriver's location. 

options = Options()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
driver=webdriver.Chrome(executable_path = 'C:/Users/kuria/anaconda3/Lib/site-packages/selenium/webdriver/chrome/chromedriver.exe',options = options)
driver.get("https://www.rentcollegepads.com/off-campus-housing/ohio-state/search")
listings=[]

while(True):
    try:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        stages = driver.find_elements_by_class_name('col-9')
        page = driver.page_source
        soup = BeautifulSoup(page, 'html.parser')
        container = soup.find_all('div', attrs={'class':'col-9'})
    
        len_list_before = len(listings)
  
        for div in container:
            if div.a["href"] in listings:
                continue
            else:
                listings.append(div.a['href'])

        len_list_after = len(listings)
    
        if len_list_before==len_list_after:
            break

        loadmore = driver.find_element_by_css_selector('button[class = "btn btn-success btn-block"]')
        loadmore.click()
        time.sleep(2)
        
    except NoSuchElementException: 
        print("Reached the end")
        end = timeit.timeit()
        break



Reached the end


## Scrapping data for the rent from the first table

In [220]:
## Scrapping the first set of data to create a temporary dataframe
location_address_temp = [] ## This is  a temporary array for storing the address
location_name = [] ## This is the location of the adress for the listing
location_name_shorthand = [] ## This is the shortcut taken from headers
stuff = []
names = []
bed = []
bath = []
rent = []

headers = {"Accept-Language": "en-US, en;q=0.5"}

direction = ["North","East","West","South","E","W","N","S"]

for i in range(0,len(listings),1):
    url = listings[i]
    page = requests.get(url ,headers = headers)
    soup = BeautifulSoup(page.text, "html.parser")    
    table = soup.find_all('div', 
              attrs = {'class':'tab-pane active',
                      'aria-hidden':"false",
                      'id':"__BVID__43"})
    
    for row in table[0].find_all('tr'):  ## Looping through each url for each listing and then scrapping data from the first
        for col in row.find_all('td'):   ## table from each of its row and incase if a column is empty the list appends None to it.
            if not col.string is None:
                stuff.append(col.string.strip())
            else:
                stuff.append("None")
         
        if not stuff:
            continue
        else:
            names.append(stuff[0])   ## Adding each column of the of the table to its respective list defined above
            bed.append(stuff[1])
            bath.append(stuff[2])
            rent.append(stuff[3])

        
        location_name.append(soup.find("span", re.compile("sub-heading")).text)  ## Adding the subheading location to the location_name 
        location_address_temp = soup.find("span", re.compile("sub-heading")).text
        if not list(set(location_address_temp.split()) & set(direction)):
            location_name_shorthand.append("Null") ## Adding the direction shortcut by looking into the location adress
        else:
            location_name_shorthand.append(list(set(location_address_temp.split()) & set(direction))[0])
    
        stuff = []


In [221]:
## Creating a dataframe with the table columns
units = pd.DataFrame(columns = ['location_name','location_name_shorthand','beds','baths','price'])

units['location_name'] = location_name
units['location_name_shorthand'] = location_name_shorthand
units['Beds'] = bed
units['Baths'] = bath
units['Price'] = rent
units['Price'] = units['Price'].apply(lambda x: x.strip('$'))
units['Price'] = units["Price"].apply(lambda x: x.split('-')[0])

In [222]:
units = units.applymap(lambda x: x.strip('\n') if isinstance(x, str) else x)
units['location_name_shorthand'] = units['location_name_shorthand'].apply(lambda x: "West" if x == "W" else x)
units['location_name_shorthand']  = units['location_name_shorthand'].apply(lambda x: "East" if x == "E" else x)
units['location_name_shorthand']  = units['location_name_shorthand'].apply(lambda x: "North" if x == "N" else x)
units['location_name_shorthand']  = units['location_name_shorthand'].apply(lambda x: "South" if x == "S" else x)


## Scrapping the data for the parking table

In [182]:
## Scrapping from the parking table for the parking data 
stuff = []
parking_type = []
number = []
price_type = []
location_name = []
descrip_laundry = []
#descrip_furnished = []
descrip_parking = []


for i in range(0,len(listings),1):
    url = listings[i]
    page = requests.get(url ,headers = headers)
    soup = BeautifulSoup(page.text, "html.parser")
    feature_block = soup.find_all('div',
             attrs = {'class':'feature-block'})[1]
    
    check_description = feature_block.find_all('h2') ## This is the check if the heading of the table is Parking and if an 
    if not check_description[0].string :             ## actual table exists as such.
        for row in feature_block.find_all('tr'):
            for col in row.find_all('td'):
                if not col.string is None:
                    stuff.append(col.string.strip())
                else:
                    stuff.append("None")
         
            if not stuff:
                continue
            else:
                parking_type.append(stuff[0])
                number.append(stuff[1])

            stuff = []
            
            location_name.append(soup.find("span", re.compile("sub-heading")).text)
            
    else:

        for li in feature_block.find_all('li'):  ## If a parking table doesnt exist it would take the description and 
            if 'Laundry' in li.text:             ## look at the texts that are available and see if it matches with 
                descrip_laundry.append(li.text)         ## with anything that exists such as 'Laundry','Furnished','Parking'
                descrip_laundry.append(url)             ## It also appends the url of the link and the name of the location
                descrip_laundry.append(soup.find("span", re.compile("sub-heading")).text)
            else:
                descrip_laundry.append('Null')
          #  if 'Furnished' in li.text:
          #      descrip_furnished.append(li.text)
          #      descrip_furnished.append(url)
          #      descrip_furnished.append(soup.find("span", re.compile("sub-heading")).text)
          #  else:
          #      descrip_furnished.append('Null')
            if 'Parking' in li.text:
                descrip_parking.append(li.text)
                descrip_parking.append(url)
                descrip_parking.append(soup.find("span", re.compile("sub-heading")).text)
            else:
                descrip_parking.append('Null')
        
       # random.append(soup.find("span", re.compile("sub-heading")).text)
    

In [183]:
## Creating secondary dataframe for laundry dataframe
laundry = pd.DataFrame(columns = ['location_name','Laundry'])
loc_name = []
laundry_aval = []
i = 0
while  i < len(descrip_laundry): 
    if descrip_laundry[i] != 'Null':
        loc_name.append(descrip_laundry[i+2])
        laundry_aval.append(descrip_laundry[i])
        i+=3
    else:
        i+=1
laundry['location_name'] = loc_name
laundry['Laundry'] = laundry_aval
laundry = laundry.applymap(lambda x: x.strip('\n') if isinstance(x, str) else x)

Unnamed: 0,location_name,Laundry
0,"116 E 12th Ave, Columbus, OH 43201, United States",Laundry in House
1,"151 W 10th Ave, Columbus, OH 43201, United States",Laundry in Apartment
2,"141 E 13th Ave, Columbus, OH 43201, USA",Laundry in Building
3,"94 E 18th Ave, Columbus, OH 43201, United States",Laundry in Apartment
4,"155 W 10th Ave, Columbus, OH 43201, United States",Laundry in Apartment
5,"253 E 19th Ave, Columbus, OH 43201, United States",Laundry in Unit
6,"214 W Norwich Ave, Columbus, OH 43201, United ...",Laundry in Building
7,"99 E 16th Ave, Columbus, OH 43201, USA",Laundry on site
8,"51 E 13th Ave, Columbus, OH 43201, USA",Laundry in building
9,"95 Chittenden Ave, Columbus, OH 43201, United ...",Laundry in Unit


In [184]:
## Creating dataframe from the parking table
parking = pd.DataFrame(columns = ['location_name','parking_type', 'number'])
parking['location_name'] = location_name
parking['parking_type'] = parking_type
parking['number'] = number
parking = parking.applymap(lambda x: x.strip('\n') if isinstance(x, str) else x)
parking = parking.replace(r'^\s*$', np.nan, regex=True)

Unnamed: 0,location_name,parking_type,number
0,"442 East Northwood Avenue Columbus, OH 43201 USA",Off-Street Parking,1
1,"250 W Norwich Ave Columbus, OH 43201",Garage Parking,1
2,"1234 Steelwood Rd Columbus, OH 43212",Surface Lot,1
3,"1234 Steelwood Rd Columbus, OH 43212",Garage Parking,1
4,"1234 Steelwood Rd Columbus, OH 43212",Indoor Bike Storage,2
...,...,...,...
494,"144 East Norwich Avenue Columbus, OH 43201 USA",Off-Street Parking,1
495,"144 East Norwich Avenue Columbus, OH 43201 USA",Off-Street Parking,1
496,"252 E 17th Ave, Columbus, OH 43201",off-street parking,0
497,"45 W 9th Ave, Unit 47, Columbus, OH 43201, Uni...",Off-Street Parking,0


In [185]:
## Creating secondary dataframe from the description of the parking table
parking_new = pd.DataFrame(columns = ['location_name','parking_type','number'])
loc_name = []
park = []
i = 0
while  i < len(descrip_parking): 
    if descrip_parking[i] != 'Null':
        loc_name.append(descrip_parking[i+2])
        park.append(descrip_parking[i])
        i+=3
    else:
        i+=1

parking_new['location_name'] = loc_name
parking_new['parking_type'] = park
parking_new.loc[:,'number'] = 1
parking_new = parking_new.applymap(lambda x: x.strip('\n') if isinstance(x, str) else x)

Unnamed: 0,location_name,parking_type,number
0,"116 E 12th Ave, Columbus, OH 43201, United States",Off Street Parking,1
1,"151 W 10th Ave, Columbus, OH 43201, United States",On Street Parking Only,1
2,"94 E 18th Ave, Columbus, OH 43201, United States",2 Assigned Parking Spaces (1 Double Lane),1
3,"155 W 10th Ave, Columbus, OH 43201, United States",On Street Parking Only,1
4,"23 W 10th Ave\nColumbus, OH 43201",Off Street Parking,1
5,"253 E 19th Ave, Columbus, OH 43201, United States",Assigned Parking Space,1
6,"214 W Norwich Ave, Columbus, OH 43201, United ...",Assigned Off-Street Parking Spaces,1
7,"95 Chittenden Ave, Columbus, OH 43201, United ...",2 Assigned Parking Spaces (1 Double Lane),1


In [186]:
## Concatenating them both parking and parking_new
parking = pd.concat([parking, parking_new], axis=0)

Unnamed: 0,location_name,parking_type,number
0,"442 East Northwood Avenue Columbus, OH 43201 USA",Off-Street Parking,1
1,"250 W Norwich Ave Columbus, OH 43201",Garage Parking,1
2,"1234 Steelwood Rd Columbus, OH 43212",Surface Lot,1
3,"1234 Steelwood Rd Columbus, OH 43212",Garage Parking,1
4,"1234 Steelwood Rd Columbus, OH 43212",Indoor Bike Storage,2
...,...,...,...
3,"155 W 10th Ave, Columbus, OH 43201, United States",On Street Parking Only,1
4,"23 W 10th Ave\nColumbus, OH 43201",Off Street Parking,1
5,"253 E 19th Ave, Columbus, OH 43201, United States",Assigned Parking Space,1
6,"214 W Norwich Ave, Columbus, OH 43201, United ...",Assigned Off-Street Parking Spaces,1


## Adding data from the additional features

In [187]:
additional = pd.DataFrame()
for i in range(0,len(listings)-1,1):
    Laundry = []
    Furnished = []
    Parking =  []
    Washer = []
    c = []
    thisdict = {}
    url = listings[i]
    page = requests.get(url ,headers = headers)
    soup = BeautifulSoup(page.text, "html.parser")
    feature_block = soup.find_all('div',
                 attrs = {'class':'extra-feature'})[2]
    
    for li in feature_block.find_all('li'):
             c.append(li.text.strip('\n'))
    if len(c) == 0:
        continue
    else:
        for j in c:
            if 'Laundry' in j:
                Laundry.append(j)
            if 'Furnished' in j:
                Furnished.append(j)
            if 'Parking' in j:
                Parking.append(j)
            if 'Washer' in j:
                Washer.append(j)
                    
    thisdict = {
      "location_name": soup.find("span", re.compile("sub-heading")).text.strip('\n'),
      "Laundry": list(Laundry),
      "Furnished": list(Furnished),
      "Parking": list(Parking),
      "Washing": list(Washer)
    }
    
    additional = additional.append(thisdict, ignore_index=True)
                    

## Cleaning all the dataframe and merging them together

###  Cleaning the additional Data Frame

In [188]:
additional = additional.applymap(lambda x: x.strip() if isinstance(x, str) else x)
additional=additional.rename(columns = {'address':'location_name'})
additional['Furnished'] =  additional["Furnished"].apply(lambda x: ",".join(x) if isinstance(x, list) else x)
additional['Laundry'] =  additional["Laundry"].apply(lambda x: ",".join(x) if isinstance(x, list) else x)
additional['Washing'] =  additional["Washing"].apply(lambda x: ",".join(x) if isinstance(x, list) else x)
additional['Parking'] =  additional["Parking"].apply(lambda x: ",".join(x) if isinstance(x, list) else x)
#additional['Parking'] = additional["Parking"].apply(lambda x: np.nan if len(x) == 0 else x)
additional = additional.replace(r'^\s*$', np.nan, regex=True)
additional

Unnamed: 0,Furnished,Laundry,Parking,Washing,location_name
0,"Furnished,Furnished Packages Available","Laundry Room,On-Site Laundry",,,"442 East Northwood Avenue Columbus, OH 43201 USA"
1,"Furnished,Furnished Packages Available",On-Site Laundry,,WasherDryer In-Unit,"250 W Norwich Ave Columbus, OH 43201"
2,Furnished,,,WasherDryer In-Unit,"1234 Steelwood Rd Columbus, OH 43212"
3,,,,,"15 E Lane Ave, Columbus, OH 43201, United States"
4,,On-Site Laundry,,,"4765 Blairfield Dr Columbus, OH 43214 USA"
...,...,...,...,...,...
574,,,,,"29 W 9th Ave, Columbus, OH 43201, United States"
575,,On-Site Laundry,,,"144 East Norwich Avenue Columbus, OH 43201 USA"
576,,,,WasherDryer In-Unit,"190 W Norwich Ave, Columbus, OH 43201, United ..."
577,,Laundry In Unit,,,"252 E 17th Ave, Columbus, OH 43201"


### Merging the laundry and additional dataframes and creating a single column

In [189]:
merged = pd.merge(additional,laundry,on = "location_name",how = "left")
merged['Laundry'] = merged['Laundry_x'].where(merged['Laundry_x'].notnull(),merged['Laundry_y'])
merged = merged.drop(['Laundry_x','Laundry_y'],axis = 1)
merged['Laundry'].unique()

array(['Laundry Room,On-Site Laundry', 'On-Site Laundry', nan,
       'Laundry Room,New Laundry Machines, On-Site Laundry',
       'On-Site Laundry,Laundry Room', 'Laundry in House',
       'On-Site Laundry,New Laundry Machines', ' Laundry In Unit',
       'Laundry in Apartment', 'Laundry In Unit', 'Laundry in Building',
       'Laundry Room', ' On-Site Laundry', 'Laundry in Unit',
       'Laundry on site', 'Laundry in building'], dtype=object)

### Merging parking and merged dataframes

In [190]:
merged = pd.merge(merged,parking,on = "location_name",how = "left")
merged['Parking'] = merged['parking_type'].where(merged['parking_type'].notnull(),merged['Parking'])
merged = merged.drop(['parking_type'],axis = 1)

Unnamed: 0,Furnished,Parking,Washing,location_name,Laundry,number
0,"Furnished,Furnished Packages Available",Off-Street Parking,,"442 East Northwood Avenue Columbus, OH 43201 USA","Laundry Room,On-Site Laundry",1
1,"Furnished,Furnished Packages Available",Garage Parking,WasherDryer In-Unit,"250 W Norwich Ave Columbus, OH 43201",On-Site Laundry,1
2,Furnished,Surface Lot,WasherDryer In-Unit,"1234 Steelwood Rd Columbus, OH 43212",,1
3,Furnished,Garage Parking,WasherDryer In-Unit,"1234 Steelwood Rd Columbus, OH 43212",,1
4,Furnished,Indoor Bike Storage,WasherDryer In-Unit,"1234 Steelwood Rd Columbus, OH 43212",,2
...,...,...,...,...,...,...
599,,Off-Street Parking,,"144 East Norwich Avenue Columbus, OH 43201 USA",On-Site Laundry,1
600,,Off-Street Parking,,"144 East Norwich Avenue Columbus, OH 43201 USA",On-Site Laundry,1
601,,,WasherDryer In-Unit,"190 W Norwich Ave, Columbus, OH 43201, United ...",,
602,,off-street parking,,"252 E 17th Ave, Columbus, OH 43201",Laundry In Unit,0


### Merging the Laundry and Washing columns 

In [191]:
merged['Laundry'] = merged['Laundry'].where(merged['Laundry'].notnull(),merged['Washing'])
merged = merged.drop(['Washing'],axis = 1)

Unnamed: 0,Furnished,Parking,Washing,location_name,Laundry,number
0,"Furnished,Furnished Packages Available",Off-Street Parking,,"442 East Northwood Avenue Columbus, OH 43201 USA","Laundry Room,On-Site Laundry",1
1,"Furnished,Furnished Packages Available",Garage Parking,WasherDryer In-Unit,"250 W Norwich Ave Columbus, OH 43201",On-Site Laundry,1
2,Furnished,Surface Lot,WasherDryer In-Unit,"1234 Steelwood Rd Columbus, OH 43212",WasherDryer In-Unit,1
3,Furnished,Garage Parking,WasherDryer In-Unit,"1234 Steelwood Rd Columbus, OH 43212",WasherDryer In-Unit,1
4,Furnished,Indoor Bike Storage,WasherDryer In-Unit,"1234 Steelwood Rd Columbus, OH 43212",WasherDryer In-Unit,2
...,...,...,...,...,...,...
599,,Off-Street Parking,,"144 East Norwich Avenue Columbus, OH 43201 USA",On-Site Laundry,1
600,,Off-Street Parking,,"144 East Norwich Avenue Columbus, OH 43201 USA",On-Site Laundry,1
601,,,WasherDryer In-Unit,"190 W Norwich Ave, Columbus, OH 43201, United ...",WasherDryer In-Unit,
602,,off-street parking,,"252 E 17th Ave, Columbus, OH 43201",Laundry In Unit,0


### Converting Furnished columns to Yes/No

In [193]:
merged['Furnished'] = merged["Furnished"].apply(lambda x: "Yes" if x is not np.nan else x)
merged['Furnished'] = merged["Furnished"].apply(lambda x: "No" if x is np.nan else x)

Unnamed: 0,Furnished,Parking,location_name,Laundry,number
0,Yes,Off-Street Parking,"442 East Northwood Avenue Columbus, OH 43201 USA","Laundry Room,On-Site Laundry",1
1,Yes,Garage Parking,"250 W Norwich Ave Columbus, OH 43201",On-Site Laundry,1
2,Yes,Surface Lot,"1234 Steelwood Rd Columbus, OH 43212",WasherDryer In-Unit,1
3,Yes,Garage Parking,"1234 Steelwood Rd Columbus, OH 43212",WasherDryer In-Unit,1
4,Yes,Indoor Bike Storage,"1234 Steelwood Rd Columbus, OH 43212",WasherDryer In-Unit,2
...,...,...,...,...,...
599,No,Off-Street Parking,"144 East Norwich Avenue Columbus, OH 43201 USA",On-Site Laundry,1
600,No,Off-Street Parking,"144 East Norwich Avenue Columbus, OH 43201 USA",On-Site Laundry,1
601,No,,"190 W Norwich Ave, Columbus, OH 43201, United ...",WasherDryer In-Unit,
602,No,off-street parking,"252 E 17th Ave, Columbus, OH 43201",Laundry In Unit,0


### Converting Parking column to Yes/No

In [135]:
merged['Parking'].unique()

array(['Off-Street Parking', 'Garage Parking', 'Surface Lot',
       'Indoor Bike Storage', nan, '404 King Parking',
       'off-street parking', 'Off Street Parking', 'Driveway Parking',
       'Free Off Street Parking', 'Street Parking',
       'Call For Availability', 'Free off street parking',
       'On Street Parking Only', 'Garage & Surface Available',
       'Parking Available', '2 Assigned Parking Spaces (1 Double Lane)',
       'Call for Availability', 'Off Street Parking Available',
       'off-street parking available', 'Underground Parking',
       'Outdoor Bike Storage', 'Assigned Parking Space',
       'On-Site Parking', 'Assigned Off-Street Parking Spaces',
       'Gated Parking'], dtype=object)

In [194]:
merged['Parking'] = merged["Parking"].apply(lambda x: "No" if x == "On Street Parking Only" else x)

Unnamed: 0,Furnished,Parking,location_name,Laundry,number
0,Yes,Off-Street Parking,"442 East Northwood Avenue Columbus, OH 43201 USA","Laundry Room,On-Site Laundry",1
1,Yes,Garage Parking,"250 W Norwich Ave Columbus, OH 43201",On-Site Laundry,1
2,Yes,Surface Lot,"1234 Steelwood Rd Columbus, OH 43212",WasherDryer In-Unit,1
3,Yes,Garage Parking,"1234 Steelwood Rd Columbus, OH 43212",WasherDryer In-Unit,1
4,Yes,Indoor Bike Storage,"1234 Steelwood Rd Columbus, OH 43212",WasherDryer In-Unit,2
...,...,...,...,...,...
599,No,Off-Street Parking,"144 East Norwich Avenue Columbus, OH 43201 USA",On-Site Laundry,1
600,No,Off-Street Parking,"144 East Norwich Avenue Columbus, OH 43201 USA",On-Site Laundry,1
601,No,,"190 W Norwich Ave, Columbus, OH 43201, United ...",WasherDryer In-Unit,
602,No,off-street parking,"252 E 17th Ave, Columbus, OH 43201",Laundry In Unit,0


In [195]:
list_parking = merged['Parking'].unique()
list_parking = list_parking.tolist()
merged['Parking'] = merged['Parking'].apply(lambda x: "Yes" if x in list_parking else x)

Unnamed: 0,Furnished,Parking,location_name,Laundry,number
0,Yes,Yes,"442 East Northwood Avenue Columbus, OH 43201 USA","Laundry Room,On-Site Laundry",1
1,Yes,Yes,"250 W Norwich Ave Columbus, OH 43201",On-Site Laundry,1
2,Yes,Yes,"1234 Steelwood Rd Columbus, OH 43212",WasherDryer In-Unit,1
3,Yes,Yes,"1234 Steelwood Rd Columbus, OH 43212",WasherDryer In-Unit,1
4,Yes,Yes,"1234 Steelwood Rd Columbus, OH 43212",WasherDryer In-Unit,2
...,...,...,...,...,...
599,No,Yes,"144 East Norwich Avenue Columbus, OH 43201 USA",On-Site Laundry,1
600,No,Yes,"144 East Norwich Avenue Columbus, OH 43201 USA",On-Site Laundry,1
601,No,Yes,"190 W Norwich Ave, Columbus, OH 43201, United ...",WasherDryer In-Unit,
602,No,Yes,"252 E 17th Ave, Columbus, OH 43201",Laundry In Unit,0


In [196]:
merged['number'].fillna(0, inplace=True)
merged['number'] = merged['number'].astype(int)
merged['Parking'] = np.where((merged['Parking'] == 'Yes') & (merged['number'] > 0),
                     "Yes", "No")
merged = merged.drop(['number'],axis = 1)

### Converting Laundry column to unit/building/None 

In [199]:
list_laundry_unit = ['WasherDryer In-Unit','Laundry in Apartment','Laundry In Unit',
                'WasherDryer In-Unit\n','Washer Dryer Hook-Ups','Laundry in House',
                'WasherDryer In-Unit,Washer Dryer Hook-Ups','WasherDryer In-Unit\n ,Washer Dryer Hook-Ups',
               'Laundry in Unit',' Laundry In Unit','WasherDryer In-Unit\n ','On-Site Laundry,New Laundry Machines']

In [200]:
list_laundry_building = ['Laundry Room,On-Site Laundry','On-Site Laundry','Laundry Room,New Laundry Machines, On-Site Laundry',
                        'Laundry in building','On-Site Laundry,Laundry Room','Laundry in Building',
                        'On-Site Laundry','Laundry on site',' On-Site Laundry'
                        'WasherDryer In-Unit\n ',' On-Site Laundry','Laundry Room']

In [201]:
merged["Laundry"] = merged["Laundry"].apply(lambda x: "unit" if x in list_laundry_unit else x)
merged["Laundry"] = merged["Laundry"].apply(lambda x: "building" if x in list_laundry_building else x)

###  Merging units and merged dataframes

In [227]:
units = pd.merge(units,merged,on = "location_name",how = "left")
units = units.drop(['location_name'],axis = 1)
units

Unnamed: 0,location_name_shorthand,beds,baths,price,Furnished,Parking,Laundry
0,East,2,1,790,Yes,Yes,building
1,East,2,1,850,Yes,Yes,building
2,West,2,2,1130,Yes,Yes,building
3,West,4,4,950,Yes,Yes,building
4,West,4,4,920,Yes,Yes,building
...,...,...,...,...,...,...,...
1524,West,4,1,0,No,No,unit
1525,West,8,1,0,No,No,unit
1526,East,6,2,3800,No,No,unit
1527,West,6,1,0,,,


In [None]:
units = units.rename(columns = {'location_name_shorthand':'Location'})

In [None]:
units = units[['Location','Beds','Baths','Parking','Laundry','Furnished','Price']]

In [229]:
units.to_csv('units.csv')