In [None]:
# Get the API, $50 minimum last time I checked, lot of data with that
# https://dataforseo.com/

# Data For SEO credentials
# DO NOT SHARE THIS INFO !!!
USER=""
PASS=""

# The number of positions to grab from the top 100 results
SHOW_ONLY=10

# The name of the file to download
FILE_NAME='serp.csv'

# Google configuration
GOOGLE_URL="google.com"
GOOGLE_LANG="English"
GOOGLE_LOCATION="United States"

In [None]:
# List of keywords, one per line
keywords_list = """
on page seo tool
advanced seo tool
seo measurement tool
"""

In [None]:
# Boilerplate for API calls

import json
from random import Random
from base64 import b64encode
from http.client import HTTPSConnection, HTTPConnection

class Data4SEO:
    domain = "api.dataforseo.com"

    def __init__(self, username, password):
        self.username = username
        self.password = password

    def request(self, path, method, data=None):

        connection = HTTPSConnection(self.domain)

        try:

            base64_bytes = b64encode(("%s:%s" % (self.username, self.password)).encode("ascii")).decode("ascii")
            headers = {
                'Authorization': 'Basic %s' % base64_bytes,
                'Content-type': 'application/json',
            }
            connection.request(method, path, headers=headers, body=data)
            response = connection.getresponse()

            return json.loads(response.read().decode())

        finally:

            connection.close()

    def get(self, path):
        return self.request(path, 'GET')

    def post(self, path, data):

        if isinstance(data, str):
            data_str = data
        else:
            data_str = json.dumps(data)

        return self.request(path, 'POST', data_str)

In [None]:
# Clean the keywords
keywords = [w.strip().lower() for w in keywords_list.split("\n") if w != ""]

In [None]:
# Show top 10 keywords from the list
keywords[:10]

['on page seo tool', 'advanced seo tool', 'seo measurement tool']

In [None]:
serp_results = []

In [None]:
# For each keyword call the API and get the top 100 results
for keyword in keywords:
  print("Getting data for keyword: %s" % keyword)

  rnd = Random()

  # Data for the API
  data = dict()
  data[rnd.randint(1, 30000000)] = dict(
      se_name=GOOGLE_URL,
      se_language=GOOGLE_LANG,
      loc_name_canonical=GOOGLE_LOCATION,
      key=keyword,
  )

  api = Data4SEO(USER, PASS)
  response = api.post("/v2/live/srp_tasks_post", dict(data=data))

  if response["status"] == "error":
    print("Error on keyword: %s" % keyword)
    print("Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
  else:
    print("Got Results for keyword: %s" % keyword)
    serp_results.append(dict(
      keyword=keyword,
      results=response["results"]
    ))

Getting data for keyword: on page seo tool
Got Results for keyword: on page seo tool
Getting data for keyword: advanced seo tool
Got Results for keyword: advanced seo tool
Getting data for keyword: seo measurement tool
Got Results for keyword: seo measurement tool


In [None]:
# Our first CSV row
csv_data = [["keyword","position","url","title"]]

In [None]:
# Create our CSV file
for serp_result in serp_results:
  for result in serp_result["results"]["organic"]:
    if result["result_position"] <= SHOW_ONLY:
      csv_data.append([serp_result["keyword"], result["result_position"], result["result_url"], result["result_title"]])

# Just checking our first 2 lines on the CSV are correct
print(csv_data[:2])

[['keyword', 'position', 'url', 'title'], ['on page seo tool', 1, 'https://www.smartinsights.com/search-engine-optimisation-seo/on-page-optimisation/best-tools-page/', 'Best tools for on-page SEO and how to use them | Smart Insights']]


In [None]:
import io
import csv

output = io.StringIO()
writer = csv.writer(output, delimiter=",", quoting=csv.QUOTE_NONNUMERIC)

for row in csv_data:
  writer.writerow(row)

output.getvalue().split("\r\n")[:2]

['"keyword","position","url","title"',
 '"on page seo tool",1,"https://www.smartinsights.com/search-engine-optimisation-seo/on-page-optimisation/best-tools-page/","Best tools for on-page SEO and how to use them | Smart Insights"']

In [None]:
import pandas as pd

output.seek(0)
df = pd.read_csv(output)
df[:2]

Unnamed: 0,keyword,position,url,title
0,on page seo tool,1,https://www.smartinsights.com/search-engine-op...,Best tools for on-page SEO and how to use them...
1,on page seo tool,2,https://www.internetmarketingninjas.com/seo-to...,On-Page Optimization Tool - Internet Marketing...


In [None]:
from google.colab import drive
from google.colab import files

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df.to_csv(FILE_NAME)

In [None]:
files.download(FILE_NAME)