# Web scraping Land Price Data

- Import Libraries
- create our ETL functions
- Scrape the data
- Create a CSV file

### 1.) Import Libraries

In [64]:
import requests
from bs4 import BeautifulSoup

#### 2.) Creating ETL functions

In [65]:
url_list = []
items_list = []

In [66]:
def get_page_urls(page):
    base_url = 'https://www.jumia.cm'
    response = requests.get(f'https://www.jumia.cm/en/land-plots?page={page}')
    soup = BeautifulSoup(response.text,'html.parser')
    list_urls = soup.find_all('article')
    for partial_url in list_urls:
        new_url = base_url + partial_url.find('a')['href']
        url_list.append(new_url)

In [67]:
def extract_transform(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text,'html.parser')
    location = soup.find('span',{'itemprop':'addressLocality'}).get_text()
    area = int(soup.find_all('h3')[1].get_text().replace('Area','').replace(' m2',''))
    price = int(soup.find('span',{'itemprop':'price'}).get_text().replace(',',""))
    
    items = {
        'Location': location,
        'Area': area,
        'Price': price
    }
    
    items_list.append(items)

### 3.) Scraping the page

In [68]:
for page in range(1,3):
    get_page_urls(page)

In [69]:
for url in url_list:
    extract_transform(url)
items_list

[{'Location': 'Odza', 'Area': 3013, 'Price': 35000},
 {'Location': 'Douala', 'Area': 150000, 'Price': 12000},
 {'Location': 'PK21', 'Area': 20000, 'Price': 5000},
 {'Location': 'PK21', 'Area': 9000, 'Price': 17000},
 {'Location': 'Yassa', 'Area': 100000, 'Price': 6000},
 {'Location': 'Bastos', 'Area': 1800, 'Price': 350000},
 {'Location': 'Nyalla', 'Area': 500, 'Price': 25000000},
 {'Location': 'Bastos', 'Area': 499, 'Price': 250000},
 {'Location': 'Quartier Golf', 'Area': 1500, 'Price': 350000},
 {'Location': 'Quartier Golf', 'Area': 510, 'Price': 350000},
 {'Location': 'Bastos', 'Area': 789, 'Price': 400000},
 {'Location': 'Akwa', 'Area': 811, 'Price': 1100000000},
 {'Location': 'Bonaberi', 'Area': 50000, 'Price': 40000},
 {'Location': 'Bali', 'Area': 300, 'Price': 200000000},
 {'Location': 'Bali', 'Area': 400, 'Price': 350000000},
 {'Location': 'PK14', 'Area': 350, 'Price': 7000000}]

### 4.) Convert to CSV

In [70]:
import pandas as pd

In [71]:
data = pd.DataFrame(items_list)
data

Unnamed: 0,Location,Area,Price
0,Odza,3013,35000
1,Douala,150000,12000
2,PK21,20000,5000
3,PK21,9000,17000
4,Yassa,100000,6000
5,Bastos,1800,350000
6,Nyalla,500,25000000
7,Bastos,499,250000
8,Quartier Golf,1500,350000
9,Quartier Golf,510,350000


In [72]:
data.to_csv("land_price_data.csv",index = False)