In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import os
import re
from tqdm import tqdm
from tqdm import tqdm_notebook
import requests
import json
import requests_cache
from IPython.core.display import clear_output
import pydata_google_auth
import pandas_gbq
import warnings
warnings.filterwarnings('ignore')
from scipy import stats
from scipy.stats import norm,skew
import time
from retry import retry

In [2]:
!pip install pydata_google_auth pandas_gbq --quiet
!pip install retry --quiet
!pip install requests_cache --quiet

In [4]:
SQL="""
WITH mobile_contacts AS
    (
    SELECT 
        customer_account
        ,REPLACE(UPPER(postcode),' ','') AS postcode
        ,mobile AS number_of_uw_mobiles_per_account
        ,COALESCE(IFNULL(CASE WHEN custinvmobile LIKE '07%' AND LENGTH(custinvmobile) = 11 THEN custinvmobile ELSE NULL END
                 ,IFNULL(CASE WHEN custmobile LIKE '07%' AND LENGTH(custmobile) = 11 THEN custmobile ELSE NULL END
                 ,IFNULL(CASE WHEN custinvphone LIKE '07%' AND LENGTH(custinvphone) = 11 THEN custinvphone ELSE NULL END
                 ,IFNULL(CASE WHEN custphone LIKE '07%' AND LENGTH(custphone) = 11 THEN custphone ELSE NULL END, '0'))))) AS account_mobile_contact
        ,COUNT(postcode) AS accounts_in_postcode
    FROM `uw-data-warehouse-dev.data_platform_customer.customer` 
    LEFT JOIN `uw-data-warehouse-dev.data_platform_bill.customer` ON custaccountno = customer_account
    WHERE account_status = 'Live'
        AND postcode != 'NW9 5AB' --Many mobiles registered at headoffice
    GROUP BY 1,2,3,4
    )
SELECT 
    *
FROM mobile_contacts
ORDER BY 3 DESC, 5 DESC -- Ordered by number of mobiles
"""

In [5]:
cache=pydata_google_auth.cache.NOOP
auth=pydata_google_auth.get_user_credentials(scopes=["https://www.googleapis.com/auth/bigquery"], 
                                               credentials_cache=cache)

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=262006177488-3425ks60hkk80fssi9vpohv88g6q1iqd.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fbigquery&state=ynvrSD5LYWE3BdvoTbpRfu18N6lfrU&prompt=consent&access_type=offline
Enter the authorization code: 4/1gFSsQ-zq5ro_SIsfS5QSSnlYNqAAbAq5Tj06z_lH16YtRHZnPATXD0


In [6]:
PROJECT = "uw-data-warehouse-prod"
df=pandas_gbq.read_gbq(SQL,credentials=auth,project_id=PROJECT)

Downloading: 100%|██████████| 648628/648628 [00:29<00:00, 22337.39rows/s]


In [5]:
postcodes_pickle_path='postcodes.pkl'
if os.path.isfile(postcodes_pickle_path):
    postcodes=pd.read_pickle('postcodes.pkl')
else: 
    postcodes=pandas_gbq.read_gbq(SQL,credentials=auth,project_id=PROJECT)
    df.to_pickle(postcodes_pickle_path)

In [None]:
if os.path.isfile(postcodes_pickle_path):
    postcodes=pd.read_pickle('')
else:
    

In [37]:
os.makedirs('responses', exist_ok=True)
@retry(delay=5,backoff=5)
def fetch(postcode):
    fname=f'./responses/{postcode}.txt'
    if os.path.isfile(fname):
        return 
    base_url=f'https://ofcomapi.samknows.com/mobile-coverage-pc-enhanced?postcode={postcode}'    
    with open(fname,'w') as f:
         f.write(json.dumps(requests.get(base_url).json()))

In [38]:
postcodes_tqdm=tqdm_notebook(postcodes.postcode.tolist(), total=len(set(postcodes.postcode.tolist())))
for postcode in postcodes_tqdm:
    postcodes_tqdm.set_description("Processing {}".format(postcode))
    fetch(postcode)

HBox(children=(FloatProgress(value=0.0, max=435594.0), HTML(value='')))




## Scrape broadband api

In [12]:
mobile_postcodes=pd.read_csv('mobile_postcodes.csv')

In [19]:
os.makedirs('mobile_responses',exist_ok=True)
@retry(delay=5,backoff=5)
def fetch(postcode):
    fname=f'./mobile_responses/{postcode}.txt'
    if os.path.isfile(fname):
        return 
    base_url=f'https://ofcomapi.samknows.com/fixed-line-coverage-pc?postcode={postcode}'
    with open(fname,'w') as f:
        f.write(json.dumps(requests.get(base_url).json()))

In [20]:
postcodes_tqdm=tqdm_notebook(mobile_postcodes.postcode.tolist(),total=len(set(mobile_postcodes.postcode.tolist())))
for postcode in postcodes_tqdm:
    postcodes_tqdm.set_description('Processing {}'.format(postcode))
    fetch(postcode)

HBox(children=(FloatProgress(value=0.0, max=188026.0), HTML(value='')))

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceed




In [15]:
url=f'https://ofcomapi.samknows.com/fixed-line-coverage-pc?postcode=SE114HQ'

In [17]:
json.dumps(requests.get(url).json())

{'data': {'postcode': 'SE114HQ',
  'max_adsl_predicted_down': 19,
  'max_adsl_predicted_up': 1,
  'max_sfbb_predicted_down': 200,
  'max_sfbb_predicted_up': 20,
  'max_ufbb_predicted_down': -1,
  'max_ufbb_predicted_up': -1,
  'adsl_availability': 100,
  'sfbb_availability': 100,
  'ufbb_availability': 0},
 'code': 'OK',
 'message': 'Request Successful'}

In [93]:
text='SE114HQ'
a='w36qs'

In [109]:
re.split(r'[0-9]+',text)[0]

'SE'

In [None]:
pd.read_csv('speed_new.csv')