# Web scraping Oslo bike count data

In [21]:
# install the packages if you don't have them.
!pip install requests
!pip install beautifulsoup4
!pip install pandas
!pip install tqdm
!pip install watermark
! pip install folium 



In [22]:
# Loading library
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import ast
from pandas import json_normalize
from tqdm import tqdm
import folium

In [23]:
response = requests.get("https://data.eco-counter.com/ParcPublic/?id=3936") # Sends an HTTP GET request to the target URL
response.raise_for_status() # Checks the status code of the HTTP response

# If the HTTP request was successful (i.e., the status code is in the 200–299 range), no exception information will be returned. 
# If the server returns an error code (i.e., the status code is 400 or above), raise_for_status() will raise an HTTPError.

In [24]:
html_content = response.text #  get the body of the response, i.e., the HTML content of the page, as a string
soup = BeautifulSoup(html_content, 'html.parser') # parsing HTML content

In [33]:
print(soup)

<!DOCTYPE html>

<!--[if lt IE 7]>      <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]>         <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]>         <html class="no-js lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js"> <!--<![endif]-->
<head>
<meta charset="utf-8"/>
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
<title></title>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<meta content="no-cache" http-equiv="Cache-Control"/>
<meta content="no-cache" http-equiv="Pragma"/>
<meta content="0" http-equiv="Expires"/>
<link href="styles/normalize.css" rel="stylesheet"/>
<link href="styles/leaflet.css" rel="stylesheet">
<!-- <link rel="stylesheet" href="styles/daterangepicker.css">  -->
<link href="styles/daterangepicker.min.css" rel="stylesheet"/>
<link href="styles/jquery-ui.min.css" rel="stylesheet"/>
<link href="styles/main.css" rel="stylesheet"/>
<link href="styles/plugins.css" rel="styleshe

In [55]:
soup.find_all("script")

[<script src="js/vendor/modernizr-2.6.2.min.js"></script>,
 <script src="js/vendor/jquery.min.js"></script>,
 <script src="js/vendor/leaflet.js"></script>,
 <script src="js/vendor/leaflet-plugins-1.2.1/layer/tile/Google.js"></script>,
 <script src="js/vendor/jqplot/jquery.jqplot.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.barRenderer.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.categoryAxisRenderer.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.pointLabels.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.dateAxisRenderer.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.canvasTextRenderer.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.canvasAxisTickRenderer.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.highlighter.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.cursor.min.js"></script>,
 <script src="js/vendor/slidesjs/slides.min.jquery.js"></script>,
 <script src="js/vendor/moment.min.js"></script>,
 <scri

In [35]:
def find_target_script(soup):
    scripts = soup.find_all('script')
    for script in scripts:
        if script.string and 'var prices =' in script.string:
            return script.string
    return None

In [45]:
target_script_content = find_target_script(soup)

In [47]:
print(target_script_content)

None


In [77]:
# A function to extract the JavaScript arrays as strings and then convert them into Python lists. 
def extract_js_array(target_script_content, variable_name):
    # Find the target array
    pattern = re.compile(r"var\s+" + re.escape(variable_name) + r"\s*=\s*(\[[^\]]*\]);", re.DOTALL)
    match = pattern.search(target_script_content)
    if match:
        array_str = match.group(1)
        array_data = ast.literal_eval(array_str)
        return array_data
    else:
        print(f"{variable_name} not found")
    return None

# Apply this function to your target variables (e.g., prices, bedrooms, latitudes, longitudes etc.) to extract different information related to housing.
variables = [
    'prices', 'bedrooms', 'bathrooms', 'students', 'nonStudents',
    'dss', 'pets', 'isstudio', 'isshared', 'furnished', 'hasVideo',
    'propertyTypes', 'hoursLive', 'gardens', 'parkings', 'bills',
    'availableFrom', 'minimumTenancy', 'PROPERTYIDS', 'PROPERTYLISTLATITUDES',
    'PROPERTYLISTLONGITUDES'
]

data = {}
for var in variables:
    data[var] = extract_js_array(target_script_content, var)

# Organize it into a pandas DataFrame. 
property_info = pd.DataFrame(data) 

property_info.head()

TypeError: expected string or bytes-like object, got 'NoneType'

In [79]:
test = pd.DataFrame(extract_js_array)

ValueError: DataFrame constructor not properly called!

## Try different HTML

In [65]:
response2 = requests.get("https://data.eco-counter.com/public2/?id=100031235") # Sends an HTTP GET request to the target URL


In [67]:
print(response2.raise_for_status()) # Checks the status code of the HTTP response
# If the HTTP request was successful (i.e., the status code is in the 200–299 range), no exception information will be returned. 
# If the server returns an error code (i.e., the status code is 400 or above), raise_for_status() will raise an HTTPError.

None


In [69]:
html_content2 = response2.text #  get the body of the response, i.e., the HTML content of the page, as a string
soup2 = BeautifulSoup(html_content, 'html.parser') # parsing HTML content

In [71]:
soup2.find_all("script")

[<script src="js/vendor/modernizr-2.6.2.min.js"></script>,
 <script src="js/vendor/jquery.min.js"></script>,
 <script src="js/vendor/leaflet.js"></script>,
 <script src="js/vendor/leaflet-plugins-1.2.1/layer/tile/Google.js"></script>,
 <script src="js/vendor/jqplot/jquery.jqplot.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.barRenderer.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.categoryAxisRenderer.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.pointLabels.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.dateAxisRenderer.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.canvasTextRenderer.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.canvasAxisTickRenderer.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.highlighter.min.js"></script>,
 <script src="js/vendor/jqplot/jqplot.cursor.min.js"></script>,
 <script src="js/vendor/slidesjs/slides.min.jquery.js"></script>,
 <script src="js/vendor/moment.min.js"></script>,
 <scri