# Packages

In [64]:
# Import essential packages
import urllib
from urllib.parse import urlencode
import requests
import pandas as pd
import numpy as np
import os.path, time
import hashlib

import warnings
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)


### Temporary annotations
# https://api.semrush.com/?type=phrase_organic&key={{key}}&phrase=bouwmaterialen&export_columns=Dn,Ur,Fk,Fp&database=nl&display_limit=20
# https://api.semrush.com/?type=phrase_organic&key=b9fd485dbbcc0d31bc30e828806fd14c&phrase=bouwmaterialen&export_columns=Dn,Ur,Fk,Fp&database=nl&display_limit=5
# https://api.semrush.com/?type=phrase_organic&key=b9fd485dbbcc0d31bc30e828806fd14c&phrase=bouwmaterialen&export_columns=Dn,Ur,Ts&database=nl&display_limit=1

# Variables

In [65]:
### Below code is for backlinks
endpoint_url = 'https://api.semrush.com/analytics/v1/'
export_columns = 'Dn,Ur'

## Below code is for keywords
params = {
    "?type": 'phrase_organic',
    'key': 'b9fd485dbbcc0d31bc30e828806fd14c',
    'phrase': 'bouwmaterialen',
    'export_columns': export_columns, # Columns from https://www.semrush.com/api-analytics/#columns
    'database': 'nl', # Country of market
    'display_limit': '5', # Count of retrieving results
    }

# Below code is for reffering backlinks. 
backlink_params = {
    '?key': 'b9fd485dbbcc0d31bc30e828806fd14c',
    'type': 'backlinks_refdomains',
    'target': '',
    'target_type': 'url',
    'export_columns': 'backlinks_num',
    'display_limit': '1'
    }

# Below code is for outbound links
outbound_params = {
    '?key': 'b9fd485dbbcc0d31bc30e828806fd14c',
    'type': 'backlinks',
    'target': '',
    'target_type': 'url',
    'export_columns': 'external_num',
    'display_limit': '1'
    }

### Do you want to overwrite the data if it already exists?
potential_overwrite = False

# Functions

In [14]:
### Function used to monitor credit use for SEMrush API
def semrush_call(params):
    data = urllib.parse.urlencode(params, doseq=True)
    main_call = urllib.parse.urljoin(endpoint_url, data)
    main_call = main_call.replace(r'%3F', r'?')
    print(main_call)
    return main_call

### Function used to parse data from semrush_call
def parse_response(call_data):
    results = []
    data = call_data.decode('unicode_escape')
    lines = data.split('\r\n')
    lines = list(filter(bool, lines))
    columns = lines[0].split(';')
        
    for line in lines[1:]:
        result = {}
        for i, datum in enumerate(line.split(';')):
            result[i] = datum.strip('"\n\r\t')
        results.append(result)
    return results

### Function to check if query has already been executed before, and if so, use that result
def existing_file(params_hashed):
    try: # Check if file exists
        with open(f"calls\call-{params_hashed}.csv"):
            potential_csv_file_exists = 1
    except IOError:
        potential_csv_file_exists = 0
        return False
    
    if potential_csv_file_exists == 1: # If file exists, load from file, unless potential_overwrite is True
        print("Deze call is al een keer uitgevoerd, ", end = '')
        if potential_overwrite != True:
            print("we gaan verder met de bestaande resultaten.")
            print("Dit bestand is laatst gewijzigd op: %s" % time.ctime(os.path.getmtime(f"calls\call-{params_hashed}.csv")))
            return True
        else:
            print("we maken echter nieuwe resultaten aan.")
            return False

### Function used to rename columns to readable format
def rename_columns(finalKeywordData):
    x = export_columns.split(',')
    finalKeywordData = finalKeywordData.rename(columns={finalKeywordData.columns[0]:'Number'})

    for i in range(0, (len(finalKeywordData.columns)-1)):
        finalKeywordData = finalKeywordData.rename(columns={finalKeywordData.columns[i+1]:x[i]})
        
    return finalKeywordData

### Function used to hash a given variable
def hash_variable(variable):
    string_variable = str(variable)
    hashed_string = hashlib.sha256(string_variable.encode('utf-8'))
    hashed_variable = hashed_string.hexdigest()
    return hashed_variable
        
### Function used to call all other functions to get the data in a formatted way
def execute_call():
    params_hashed = hash_variable(params)
    file_exists = existing_file(params_hashed)
    if file_exists == True: # Just load CSV file
        finalKeywordData = pd.read_csv(f"calls\call-{params_hashed}.csv")
        return rename_columns(finalKeywordData)
    else: # Execute query and save to CSV file
        data = requests.get(semrush_call(params))
        parsed_data = parse_response(data.content)
        finalKeywordData = pd.DataFrame(parsed_data)
        # Add the finalKeywordData to the dataframe and a new CSV file
        finalKeywordData.to_csv(f"calls\call-{params_hashed}.csv")
        finalKeywordData = pd.read_csv(f"calls\call-{params_hashed}.csv") # Make sure to read from CSV to easily make format uniform
        return rename_columns(finalKeywordData)
    
def backlink(dataframe):

    finalBacklinkData = dataframe.copy()
    ## For every column?
    finalBacklinkData[backlink_params['export_columns']] = np.nan

    for i in range(0, (len(finalKeywordData)-4)):
        print(finalKeywordData.Ur[i])
        
        backlink_params['target'] = finalKeywordData.Ur[i]

        data = requests.get(semrush_call(backlink_params))
        parsed_data = parse_response(data.content)
        
        conv_parsed_data = str(parsed_data)
        conv_parsed_data = conv_parsed_data.split("'")
        conv_parsed_data = int(conv_parsed_data[1])
        print(conv_parsed_data)

        finalBacklinkData[backlink_params['export_columns']][i] = conv_parsed_data
        
    return finalBacklinkData

def outbound(dataframe):

    finalOutboundData = dataframe.copy()
    ## For every column?
    finalOutboundData[outbound_params['export_columns']] = np.nan

    for i in range(0, (len(finalKeywordData)-4)):
        print(finalKeywordData.Ur[i])
        
        outbound_params['target'] = finalKeywordData.Ur[i]

        data = requests.get(semrush_call(outbound_params))
        parsed_data = parse_response(data.content)
        
        conv_parsed_data = str(parsed_data)
        conv_parsed_data = conv_parsed_data.split("'")
        conv_parsed_data = int(conv_parsed_data[1])
        print(conv_parsed_data)

        finalOutboundData[outbound_params['export_columns']][i] = conv_parsed_data
        
    return finalOutboundData

# Executing of functions

In [15]:
# Executing of final function
finalKeywordData = execute_call()
finalKeywordData

Deze call is al een keer uitgevoerd, we gaan verder met de bestaande resultaten.
Dit bestand is laatst gewijzigd op: Thu Apr 22 16:47:13 2021


Unnamed: 0,Number,Dn,Ur
0,0,bouwmaat.nl,https://www.bouwmaat.nl/bouwmaterialen
1,1,hornbach.nl,https://www.hornbach.nl/shop/Bouwmateriaal/S44...
2,2,bouwbestel.nl,https://www.bouwbestel.nl/bouwmaterialen.html
3,3,online-bouwmaterialen.nl,https://www.online-bouwmaterialen.nl/
4,4,bouwonline.com,https://www.bouwonline.com/


In [16]:
finalBacklinkData = backlink(finalKeywordData)
finalBacklinkData

bouwmaat.nl
https://api.semrush.com/analytics/v1/?key=b9fd485dbbcc0d31bc30e828806fd14c&type=backlinks_refdomains&target=bouwmaat.nl&target_type=root_domain&export_columns=backlinks_num&display_limit=1
26975


Unnamed: 0,Number,Dn,Ur,backlinks_num
0,0,bouwmaat.nl,https://www.bouwmaat.nl/bouwmaterialen,26975.0
1,1,hornbach.nl,https://www.hornbach.nl/shop/Bouwmateriaal/S44...,
2,2,bouwbestel.nl,https://www.bouwbestel.nl/bouwmaterialen.html,
3,3,online-bouwmaterialen.nl,https://www.online-bouwmaterialen.nl/,
4,4,bouwonline.com,https://www.bouwonline.com/,


In [17]:
finalOutboundData = outbound(finalKeywordData)
finalOutboundData

bouwmaat.nl
https://api.semrush.com/analytics/v1/?key=b9fd485dbbcc0d31bc30e828806fd14c&type=backlinks&target=bouwmaat.nl&target_type=root_domain&export_columns=external_num&display_limit=1
8


Unnamed: 0,Number,Dn,Ur,external_num
0,0,bouwmaat.nl,https://www.bouwmaat.nl/bouwmaterialen,8.0
1,1,hornbach.nl,https://www.hornbach.nl/shop/Bouwmateriaal/S44...,
2,2,bouwbestel.nl,https://www.bouwbestel.nl/bouwmaterialen.html,
3,3,online-bouwmaterialen.nl,https://www.online-bouwmaterialen.nl/,
4,4,bouwonline.com,https://www.bouwonline.com/,


# Unused code

In [143]:
    #params_hashed = hash_variable(params)
    
    #finaltest = finalKeywordData
#     parameters = {
#     '?key': 'b9fd485dbbcc0d31bc30e828806fd14c',
#     'type': 'backlinks_refdomains',
#     'target': target,
#     'target_type': 'root_domain',
#     'export_columns': 'backlinks_num',
#     'display_limit': '1'
#     }

#     if 'backlinks_num' in finaltest.columns:
#         print('do nothing')
#     else:
#         finaltest['backlinks_num'] = np.nan
    
    #for i in range(0, (len(finalKeywordData)-4)):
        #print(finalKeywordData.Dn[i])
#     if(np.isnan(finaltest['backlinks_num'][0])):
#         data = requests.get(semrush_call(params))
#         parsed_data = parse_response(data.content)
#         print(parsed_data)
#         print(type(parsed_data))
    
        #finaltest['backlinks_num'][0] = parsed_data[0]
        #finalKeywordData.to_csv(f"calls\call-{params_hashed}.csv")
    
#     else:
#         print('hallo')

In [73]:
finalKeywordData

Unnamed: 0,Number,Dn,Ur
0,0,bouwmaat.nl,https://www.bouwmaat.nl/bouwmaterialen
1,1,hornbach.nl,https://www.hornbach.nl/shop/Bouwmateriaal/S44...
2,2,bouwbestel.nl,https://www.bouwbestel.nl/bouwmaterialen.html
3,3,online-bouwmaterialen.nl,https://www.online-bouwmaterialen.nl/
4,4,bouwonline.com,https://www.bouwonline.com/
