In [1]:
import requests
import json, re, time
from datetime import datetime
import pandas as pd

In [2]:
with open('api.key','r') as f:
    APIKEY = f.read()

HEADERS = {'apikey': APIKEY,
            'accept': 'application/json',
            'content-type': 'application/json',
            'cookie': 'ASP.NET_SessionId=aiggen1ccck0gq141dgq1sip; ASP.NET_SessionId=aiggen1ccck0gq141dgq1sip'
          }

API_ENDPOINT = 'https://kong.speedcheckerapi.com:8443/ProbeAPIv2/'

probeInfoProperties = [
        "ASN",
        "CityName",
        "ConnectionType",
        "CountryCode",
        "DNSResolver",
        "GeolocationAccuracy",
        "IPAddress",
        "Latitude",
        "Longitude",
        "Network",
        "NetworkID",
        "Platform",
        "ProbeID",
        "Version"
    ]

In [3]:
def getTestSettings(cc, url):
    
    destinations = [url]
    json_test = {
                  "testSettings": {
                    "TestCount": 10,
                    "Sources": [
                      {
                        "CountryCode": cc
                      }
                    ],
                    "Destinations": destinations,
                    "ProbeInfoProperties": probeInfoProperties
                  }
                }
    return json_test

### StartPageLoadTest function

In [4]:
def startPageLoadTest(test_settings):
    
    test_url = API_ENDPOINT + "StartPageLoadTest"
    try:
        r = requests.post(test_url, data=json.dumps(test_settings), headers=HEADERS)
    except requests.exceptions.RequestException as e:
        return "Request FAILED"
    
    res = json.loads(r.text)
        
    if ("OK" == res['StartPageLoadTestResult']['Status']['StatusText']):
        return res['StartPageLoadTestResult']['TestID']
    else:
        return "FAILED"

### retrievePageLoadTestResults function

In [5]:
def retrievePageLoadTestResults(testID):
    
    url = API_ENDPOINT + "GetPageLoadResults?apikey=" + APIKEY + "&testID=" + testID
    
    try:    
        r = requests.get(url, headers=HEADERS)
    except requests.exceptions.RequestException as e:  # This is the correct syntax
        return []
    
    res = json.loads(r.text)
    return res['PageLoadTestResults']

## Retrieve results

In [6]:
def getResultsFromTestIDs(df_tests_url):
    df_results = pd.DataFrame(columns=['domain',
                                       'cc',
                                       'ProbeInfo',
                                       'TestDateTime',
                                       'StatusCode',
                                       'StatusText',
                                       'DNSLookupTime', 
                                       'Destination', 
                                       'HTTPStatus', 
                                       'InitialConnection', 
                                       'NumberOfRequests', 
                                       'PageLoadTime', 
                                       'SSLNegotiationTime', 
                                       'TTFB', 
                                       'TotalDownloadedBytes'
                                      ])
    
    #Exclude FAILED items
    for index, row in df_tests_url.loc[df_tests_url.testID!='FAILED'].iterrows():
        
        domain = row['domain']
        cc = row['cc']
        testID = row['testID']
        pageLoadTestResults =  retrievePageLoadTestResults(testID)

        if (pageLoadTestResults is None or len(pageLoadTestResults) < 1):
            continue
        
        for res in pageLoadTestResults:
        
            probeInfo = res['ProbeInfo']
            testDateTime = res['TestDateTime']

            #extract the epoch
            m = re.search('/Date\((\d+)\+0000\)/', testDateTime)
            if m:
                testDateTime = m.group(1)[:-3]

            #convert into human readable format
            testDateTime = datetime.fromtimestamp(int(testDateTime)).strftime('%Y-%m-%d')
            statusCode = res['TestStatus']['StatusCode']
            statusText = res['TestStatus']['StatusText']
            dnsLookupTime = res['DNSLookupTime']
            destination =  res['Destination']
            hTTPStatus = res['HTTPStatus']
            initialConnection = res['InitialConnection']
            numberOfRequests =  res['NumberOfRequests'] 
            pageLoadTime =  res['PageLoadTime'] 
            sslNegotiationTime = res['SSLNegotiationTime'] 
            ttfb = res['TTFB']
            totalDownloadedBytes = res['TotalDownloadedBytes']

            df_results = df_results.append({
                'domain': domain,
                'cc': cc,
                'ProbeInfo':probeInfo,
                'TestDateTime': testDateTime,
                'StatusCode':statusCode,
                'StatusText':statusText,
                'DNSLookupTime':dnsLookupTime,
                'Destination':destination,
                'HTTPStatus':hTTPStatus,
                'InitialConnection':initialConnection,
                'NumberOfRequests':numberOfRequests,
                'PageLoadTime':pageLoadTime,
                'SSLNegotiationTime':sslNegotiationTime,
                'TTFB':ttfb,
                'TotalDownloadedBytes':totalDownloadedBytes
            }, ignore_index=True)

    return df_results

In [7]:
def runMeasurements(df_links, url_type, filename):
    df_testID = pd.DataFrame(columns=['domain', 'cc', 'testID'])

    for index, row in df_links.iterrows():
        domain = row['domain']
        cc = row['cc']
        url = row[url_type]

        test_settings = getTestSettings(cc, url)
        testID = startPageLoadTest(test_settings)
        print(testID)
        
        df_testID = df_testID.append({'domain': domain, 'cc': cc, 'testID': testID}, ignore_index=True)
    
    df_testID.to_csv(filename, sep='|', encoding='utf-8', index=False)    
    

In [26]:
df_links = pd.read_csv('data/links_sample.csv', sep='|', encoding='utf-8', keep_default_na=False)

In [27]:
runMeasurements(df_links, 'url', 'data/url_testID.csv')
#runMeasurements(df_links, 'ori_amp_url', 'data/ori_amp_url_testID_4.csv')
#runMeasurements(df_links, 'amp_viewer_url', 'data/amp_viewer_url_testID2.csv')
#runMeasurements(df_links, 'amp_cdn_url', 'data/amp_cdn_url_testID.csv')

1477e6be-0ff5-444e-8945-8188aa5b08a9
b760f3f6-8ad7-4fca-b1b8-ad56ebc157cc
80d87a63-49bd-4958-b0c3-2108cf4ea994
fbace679-d5e0-438d-a2e5-fc51f85519eb
FAILED
53940313-f601-4c4f-952e-b6996f82c9e7
b92b2823-e20f-4939-8c82-c7d9677521cc
8b928515-fe39-47f0-8789-c3bcbcc7008e
c224555a-f211-441e-9885-dd5fed52e215
5e2b360b-6212-4fe4-98f8-d181ed8d160f
42f18b75-4f31-40e5-a4d0-28c7956d3e69
220aa338-7603-4d63-a88e-c87883696867
9867fca9-a2d2-4135-b6cf-c87875790241
b4e88713-8c1d-4aa3-960d-9b40825fd9a7
7d561dc3-a709-44e5-a11d-a11d534367cf
56f908e0-b1fa-4e17-ac83-f68abb8b2a35
b0c81b95-2f54-4ba9-92da-d8ac35586fa4
4f2cc6ad-9fbc-43c9-bee1-ba15858eef9f
8f4b892d-3cf8-4dde-b5d2-bd493059eab1
84e8af62-665b-465b-8462-7b61089048ef
172f525c-b853-464c-a223-b1276b9f6fb4
aa32deb9-dd94-4fa1-8ef0-2772134d7ecc
844ed1a9-df4f-4220-b14e-194437140fec
9d6cf027-0e46-4b25-8fc6-1dfc26fbe93a
1332c630-fffb-4c12-a341-d9319f625153
4a0c63d3-d27a-44f6-ba33-820e8e5a0ad3
8693f554-6a22-4598-829e-c096f4a8c920
a381cf10-de8e-44d5-9e9a-6f2ec51

In [28]:
df_testIDs_url = pd.read_csv('data/url_testID.csv', sep='|', encoding='utf-8', keep_default_na=False)
#df_testIDs_ori_amp = pd.read_csv('data/ori_amp_url_testID.csv', sep='|', encoding='utf-8', keep_default_na=False)
#df_testIDs_amp_viewer = pd.read_csv('data/amp_viewer_url_testID.csv', sep='|', encoding='utf-8', keep_default_na=False)
#df_testIDs_amp_cdn = pd.read_csv('data/amp_cdn_url_testID.csv', sep='|', encoding='utf-8', keep_default_na=False)

In [32]:
#df_results_ori_amp = getResultsFromTestIDs(df_testIDs_ori_amp)
#df_results_amp_viewer = getResultsFromTestIDs(df_testIDs_amp_viewer)
#df_results_amp_cdn = getResultsFromTestIDs(df_testIDs_amp_cdn)
df_results_url = getResultsFromTestIDs(df_testIDs_url)

In [33]:
df_results_url.to_csv("data/url_results.csv", sep='|', encoding='utf-8', index=False)
#df_results_ori_amp.to_csv("data/ori_amp_url_results.csv", sep='|', encoding='utf-8', index=False)
#df_results_amp_viewer.to_csv("data/amp_viewer_url_results.csv", sep='|', encoding='utf-8', index=False)
#df_results_amp_cdn.to_csv("data/amp_cdn_url_results.csv", sep='|', encoding='utf-8', index=False)

In [41]:
df_results_amp_cdn[(df_results_amp_cdn.HTTPStatus=='200') & (df_results_amp_cdn.StatusText=='OK')]

Unnamed: 0,domain,cc,ProbeInfo,TestDateTime,StatusCode,StatusText,DNSLookupTime,Destination,HTTPStatus,InitialConnection,NumberOfRequests,PageLoadTime,SSLNegotiationTime,TTFB,TotalDownloadedBytes
15,randburgsun.co.za,ZA,"{'ASN': 37457, 'CityName': 'Magaliesburg', 'Co...",2018-07-07,200,OK,214,https://randburgsun-co-za.cdn.ampproject.org/c...,200,255,1,1926,239,1914,2162
16,randburgsun.co.za,ZA,"{'ASN': 37358, 'CityName': 'Johannesburg', 'Co...",2018-07-07,200,OK,177,https://randburgsun-co-za.cdn.ampproject.org/c...,200,218,1,3017,210,2997,2162
17,randburgsun.co.za,ZA,"{'ASN': 37105, 'CityName': 'Johannesburg', 'Co...",2018-07-07,200,OK,73,https://randburgsun-co-za.cdn.ampproject.org/c...,200,370,1,3746,271,3731,2162
18,randburgsun.co.za,ZA,"{'ASN': 37457, 'CityName': 'Durban', 'Connecti...",2018-07-07,200,OK,59,https://randburgsun-co-za.cdn.ampproject.org/c...,200,272,1,4119,232,4110,2141
19,randburgsun.co.za,ZA,"{'ASN': 37105, 'CityName': 'Johannesburg', 'Co...",2018-07-07,200,OK,55,https://randburgsun-co-za.cdn.ampproject.org/c...,200,270,50,6332,245,5510,239922
20,randburgsun.co.za,ZA,"{'ASN': 36874, 'CityName': 'Durban', 'Connecti...",2018-07-07,200,OK,202,https://randburgsun-co-za.cdn.ampproject.org/c...,200,281,21,6393,234,4397,110278
21,randburgsun.co.za,ZA,"{'ASN': 37457, 'CityName': 'Paarl', 'Connectio...",2018-07-07,200,OK,1,https://randburgsun-co-za.cdn.ampproject.org/c...,200,297,51,2810,241,2796,242196
22,germistoncitynews.co.za,ZA,"{'ASN': 37457, 'CityName': 'Magaliesburg', 'Co...",2018-07-07,200,OK,51,https://germistoncitynews-co-za.cdn.ampproject...,200,352,48,1258,323,1245,230576
23,germistoncitynews.co.za,ZA,"{'ASN': 37457, 'CityName': 'Paarl', 'Connectio...",2018-07-07,200,OK,201,https://germistoncitynews-co-za.cdn.ampproject...,200,345,48,2675,311,2661,230601
24,germistoncitynews.co.za,ZA,"{'ASN': 37457, 'CityName': 'Durban', 'Connecti...",2018-07-07,200,OK,51,https://germistoncitynews-co-za.cdn.ampproject...,200,300,49,3592,257,3584,298149
