In [2]:
import requests
import json, re, time
from datetime import datetime
import pandas as pd

In [3]:
with open('api.key','r') as f:
    APIKEY = f.read()

HEADERS = {'apikey': APIKEY,
            'accept': 'application/json',
            'content-type': 'application/json',
            'cookie': 'ASP.NET_SessionId=aiggen1ccck0gq141dgq1sip; ASP.NET_SessionId=aiggen1ccck0gq141dgq1sip'
          }

API_ENDPOINT = 'https://kong.speedcheckerapi.com:8443/ProbeAPIv2/'

probeInfoProperties = [
        "ASN",
        "CityName",
        "ConnectionType",
        "CountryCode",
        "DNSResolver",
        "GeolocationAccuracy",
        "IPAddress",
        "Latitude",
        "Longitude",
        "Network",
        "NetworkID",
        "Platform",
        "ProbeID",
        "Version"
    ]

In [4]:
def getTestSettings(cc, url):
    
    destinations = [url]
    json_test = {
                  "testSettings": {
                    "TestCount": 10,
                    "Sources": [
                      {
                        "CountryCode": cc,
                        "Platform": "Android"
                      }
                    ],
                    "Destinations": destinations,
                    "ProbeInfoProperties": probeInfoProperties
                  }
                }
    return json_test

In [5]:
def getTestSettingsEUUS(url):
    
    destinations = [url]
    json_test = {
                  "testSettings": {
                    "TestCount": 2,
                    "Sources": [
                      {
                        "CountryCode": "US"
                      },
                      {
                        "CountryCode": "FR"
                      },
                      {
                        "CountryCode": "DE"
                      },
                      {
                        "CountryCode": "NL"
                      },
                      {
                        "CountryCode": "GB"
                      }  
                    ],
                    "Destinations": destinations,
                    "ProbeInfoProperties": probeInfoProperties
                  }
                }
    return json_test

### StartPageLoadTest function

In [6]:
def startPageLoadTest(test_settings):
    
    test_url = API_ENDPOINT + "StartPageLoadTest"
    try:
        r = requests.post(test_url, data=json.dumps(test_settings), headers=HEADERS)
    except requests.exceptions.RequestException as e:
        return "Request FAILED"
    
    res = json.loads(r.text)
        
    if ("OK" == res['StartPageLoadTestResult']['Status']['StatusText']):
        return res['StartPageLoadTestResult']['TestID']
    else:
        return "FAILED"

### retrievePageLoadTestResults function

In [7]:
def retrievePageLoadTestResults(testID):
    
    url = API_ENDPOINT + "GetPageLoadResults?apikey=" + APIKEY + "&testID=" + testID
    
    try:    
        r = requests.get(url, headers=HEADERS)
    except requests.exceptions.RequestException as e:  # This is the correct syntax
        return []
    
    res = json.loads(r.text)
    return res['PageLoadTestResults']

## Retrieve results

In [8]:
def getResultsFromTestIDs(df_tests_url):
    df_results = pd.DataFrame(columns=['domain',
                                       'cc',
                                       'ProbeInfo',
                                       'TestDateTime',
                                       'StatusCode',
                                       'StatusText',
                                       'DNSLookupTime', 
                                       'Destination', 
                                       'HTTPStatus', 
                                       'InitialConnection', 
                                       'NumberOfRequests', 
                                       'PageLoadTime', 
                                       'SSLNegotiationTime', 
                                       'TTFB', 
                                       'TotalDownloadedBytes'
                                      ])
    
    #Exclude FAILED items
    for index, row in df_tests_url.loc[df_tests_url.testID!='FAILED'].iterrows():
        
        domain = row['domain']
        cc = row['cc']
        testID = row['testID']
        pageLoadTestResults =  retrievePageLoadTestResults(testID)

        if (pageLoadTestResults is None or len(pageLoadTestResults) < 1):
            continue
        
        #print(pageLoadTestResults)
        #print("€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€")
        
        statusCode = ""
        statusText = ""
        dnsLookupTime = ""
        destination = ""
        hTTPStatus = ""
        initialConnection = ""
        numberOfRequests = ""
        pageLoadTime = ""
        sslNegotiationTime = ""
        ttfb = ""
        totalDownloadedBytes = ""
        
        for res in pageLoadTestResults:
        
            probeInfo = res['ProbeInfo']
            testDateTime = res['TestDateTime']

            #extract the epoch
            m = re.search('/Date\((\d+)\+0000\)/', testDateTime)
            if m:
                testDateTime = m.group(1)[:-3]

            #convert into human readable format
            testDateTime = datetime.fromtimestamp(int(testDateTime)).strftime('%Y-%m-%d')
            try:
                statusCode = res['TestStatus']['StatusCode']
                statusText = res['TestStatus']['StatusText']
                dnsLookupTime = res['DNSLookupTime']
                destination =  res['Destination']
                hTTPStatus = res['HTTPStatus']
                initialConnection = res['InitialConnection']
                numberOfRequests =  res['NumberOfRequests'] 
                pageLoadTime =  res['PageLoadTime'] 
                sslNegotiationTime = res['SSLNegotiationTime'] 
                ttfb = res['TTFB']
                totalDownloadedBytes = res['TotalDownloadedBytes']
            except:
                pass

            df_results = df_results.append({
                'domain': domain,
                'cc': cc,
                'ProbeInfo':probeInfo,
                'TestDateTime': testDateTime,
                'StatusCode':statusCode,
                'StatusText':statusText,
                'DNSLookupTime':dnsLookupTime,
                'Destination':destination,
                'HTTPStatus':hTTPStatus,
                'InitialConnection':initialConnection,
                'NumberOfRequests':numberOfRequests,
                'PageLoadTime':pageLoadTime,
                'SSLNegotiationTime':sslNegotiationTime,
                'TTFB':ttfb,
                'TotalDownloadedBytes':totalDownloadedBytes
            }, ignore_index=True)

    return df_results

In [9]:
def runMeasurements(df_links, url_type, filename):
    df_testID = pd.DataFrame(columns=['domain', 'cc', 'testID'])

    for index, row in df_links.iterrows():
        domain = row['domain']
        cc = row['cc']
        url = row[url_type]

        test_settings = getTestSettings(cc, url)
        testID = startPageLoadTest(test_settings)
        print(testID)
        
        df_testID = df_testID.append({'domain': domain, 'cc': cc, 'testID': testID}, ignore_index=True)
    
    df_testID.to_csv(filename, sep='|', encoding='utf-8', index=False)    
    

In [10]:
def runMeasurementsEUUS(df_links, url_type, filename):
    df_testID = pd.DataFrame(columns=['domain', 'cc', 'testID'])

    for index, row in df_links.iterrows():
        domain = row['domain']
        cc = row['cc']
        url = row[url_type]

        test_settings = getTestSettingsEUUS(url)
        testID = startPageLoadTest(test_settings)
        print(testID)
        
        df_testID = df_testID.append({'domain': domain, 'cc': cc, 'testID': testID}, ignore_index=True)
    
    df_testID.to_csv(filename, sep='|', encoding='utf-8', index=False)    

In [11]:
df_links = pd.read_csv('data/links_sample.csv', sep='|', encoding='utf-8', keep_default_na=False)

In [12]:
#runMeasurements(df_links, 'url', 'data/url_testID.csv')
#runMeasurements(df_links, 'ori_amp_url', 'data/ori_amp_url_testID.csv')
runMeasurements(df_links, 'amp_viewer_url', 'data/amp_viewer_url_testID-17072018.csv')
#runMeasurements(df_links, 'amp_cdn_url', 'data/amp_cdn_url_testID.csv')
#runMeasurementsEUUS(df_links, 'url', 'data/url_testID_EUUS.csv')


9357b9d4-8293-4483-bdcf-fbb44a7976fe
fb6c6f2f-c94c-4dde-aae2-1ad71a48a0ba
8ddf29ab-0094-4655-ae3b-1f65af04d8f4
83ef2132-fe53-4823-abeb-46d658584231
8314a713-a633-43ca-9571-5c58fc57a979
149cdc7e-dc2d-4841-bfd3-567ce05d9084
3113bcf4-62c3-40f2-92ed-3fafe2932ebf
7e538de1-0d3b-41c7-8135-7d2d8bc0ae7a
2d4dfb7c-d01e-4d37-a32e-4f3eda39652a
ba7de1d3-27d0-48fb-8740-756edc4316f0
19c702fd-1f21-4be9-9b8a-f5fc5803bd56
56fac131-7b1b-4ff9-a431-2cbcd450040d
13794118-d890-4916-b2ee-3daa6a7a40a9
c5a3a4c4-775f-408b-9a00-455f5c064479
1b0196d9-a44f-4eca-9d1b-67302b84d727
5582b8a5-a205-445a-a808-bd7d3a106afa
6270fd82-c654-41ec-a302-bd4504ab5eac
9221fafd-01d2-42de-849a-daad8bd65f72
d65c7b71-aaa2-4f1f-9caf-614e360a6938
984c46d6-fd42-4f3f-9e8e-fe9df52867b4
be287c38-ee3c-43fb-a5b4-a15ec0aa8f57
ff96821d-60f7-4994-90cc-6c9c4a7e9720
56446cc3-5b8a-43b7-a51f-e13aa5ae293c
410e2769-bf38-49d0-8579-aca6ca29146f
79d2e270-cfe0-4ab6-ad83-5ddfcad8d0bb
37f8345e-fb31-4bcc-855e-9e25bb960978
42227720-63ed-4396-83ea-96db543394a9
5

In [14]:
#df_testIDs_url = pd.read_csv('data/url_testID.csv', sep='|', encoding='utf-8', keep_default_na=False)
#df_testIDs_ori_amp = pd.read_csv('data/ori_amp_url_testID.csv', sep='|', encoding='utf-8', keep_default_na=False)
df_testIDs_amp_viewer = pd.read_csv('data/amp_viewer_url_testID-17072018.csv', sep='|', encoding='utf-8', keep_default_na=False)
#df_testIDs_amp_cdn = pd.read_csv('data/amp_cdn_url_testID.csv', sep='|', encoding='utf-8', keep_default_na=False)
#df_testIDs_urlEUUS = pd.read_csv('data/url_testID_EUUS.csv', sep='|', encoding='utf-8', keep_default_na=False)


In [15]:
#df_results_ori_amp = getResultsFromTestIDs(df_testIDs_ori_amp)
df_results_amp_viewer = getResultsFromTestIDs(df_testIDs_amp_viewer)
#df_results_amp_cdn = getResultsFromTestIDs(df_testIDs_amp_cdn)
#df_results_url = getResultsFromTestIDs(df_testIDs_url)
#df_results_url_EUUS = getResultsFromTestIDs(df_testIDs_urlEUUS)


In [16]:
#df_results_url.to_csv("data/url_results" + time.strftime("%d%m%Y") + ".csv", sep='|', encoding='utf-8', index=False)
#df_results_ori_amp.to_csv("data/ori_amp_url_results.csv", sep='|', encoding='utf-8', index=False)
df_results_amp_viewer.to_csv("data/amp_viewer_url_results-1707201.csv", sep='|', encoding='utf-8', index=False)
#df_results_amp_cdn.to_csv("data/amp_cdn_url_results.csv", sep='|', encoding='utf-8', index=False)
#df_results_url_EUUS.to_csv("data/url_results_EUUS" + time.strftime("%d%m%Y") + ".csv", sep='|', encoding='utf-8', index=False)


In [18]:
df_results_amp_viewer

Unnamed: 0,domain,cc,ProbeInfo,TestDateTime,StatusCode,StatusText,DNSLookupTime,Destination,HTTPStatus,InitialConnection,NumberOfRequests,PageLoadTime,SSLNegotiationTime,TTFB,TotalDownloadedBytes
0,randburgsun.co.za,ZA,"{'ASN': 37353, 'CityName': 'Cape Town', 'Conne...",2018-07-17,200,OK,0,https://www.google.com/amp/s/sandtonchronicle....,200,0,0,1526,0,1472,1001
1,randburgsun.co.za,ZA,"{'ASN': 37457, 'CityName': 'Johannesburg', 'Co...",2018-07-17,200,OK,0,https://www.google.com/amp/s/sandtonchronicle....,200,0,0,1962,0,1877,1001
2,randburgsun.co.za,ZA,"{'ASN': 37650, 'CityName': 'Pretoria', 'Connec...",2018-07-17,200,OK,0,https://www.google.com/amp/s/sandtonchronicle....,200,0,0,1403,0,1315,1001
3,randburgsun.co.za,ZA,"{'ASN': 37457, 'CityName': 'Pretoria', 'Connec...",2018-07-17,200,OK,0,https://www.google.com/amp/s/sandtonchronicle....,200,0,0,1301,0,973,1001
4,randburgsun.co.za,ZA,"{'ASN': 5713, 'CityName': 'Johannesburg', 'Con...",2018-07-17,200,OK,0,https://www.google.com/amp/s/sandtonchronicle....,200,0,0,1831,0,1680,1001
5,randburgsun.co.za,ZA,"{'ASN': 15022, 'CityName': 'Stellenbosch', 'Co...",2018-07-17,200,OK,0,https://www.google.com/amp/s/sandtonchronicle....,200,0,0,2135,0,2041,1001
6,randburgsun.co.za,ZA,"{'ASN': 37611, 'CityName': 'Stellenbosch', 'Co...",2018-07-17,408,Test timeout,0,https://www.google.com/amp/s/sandtonchronicle....,200,0,0,0,0,0,1001
7,randburgsun.co.za,ZA,"{'ASN': 37457, 'CityName': 'Plettenberg Bay', ...",2018-07-17,200,OK,0,https://www.google.com/amp/s/sandtonchronicle....,200,0,0,1086,0,1026,1001
8,randburgsun.co.za,ZA,"{'ASN': 37457, 'CityName': 'Johannesburg', 'Co...",2018-07-17,200,OK,0,https://www.google.com/amp/s/sandtonchronicle....,200,0,0,1244,0,1178,1001
9,randburgsun.co.za,ZA,"{'ASN': 37457, 'CityName': 'Durban', 'Connecti...",2018-07-17,200,OK,0,https://www.google.com/amp/s/sandtonchronicle....,200,0,0,2267,0,1697,1001
