<a href="https://colab.research.google.com/github/TheMihirNaik/google-search-console-api-using-python/blob/main/6%20-%20GSC%20URL%20Inspection%20API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Google Search Console API Class Code



In [None]:
# Import required packages
from oauth2client.client import OAuth2WebServerFlow
from googleapiclient.discovery import build
import httplib2
import pandas as pd

class GoogleSearchConsole:
    def __init__(self, client_id, client_secret, oauth_scope, redirect_uri):
        self.webmasters_service = self.authenticate(client_id, client_secret, oauth_scope, redirect_uri)

    def authenticate(self, client_id, client_secret, oauth_scope, redirect_uri):
      try:
          # Authenticate with Google Search Console API
          flow = OAuth2WebServerFlow(client_id, client_secret, oauth_scope, redirect_uri)
          authorize_url = flow.step1_get_authorize_url()
          print("Go to the following link in your browser: " + authorize_url)
          auth_code = input("Enter your Authorization Code here:")
          credentials = flow.step2_exchange(auth_code)
          http = httplib2.Http()
          creds = credentials.authorize(http)
          webmasters_service = build('searchconsole', 'v1', http=creds)

          return webmasters_service

      except Exception as e:
          # Handle authentication errors
          print("Error during authentication:", str(e))
          return None

    def get_site_list(self):
        # Fetch the site data
        site_list = self.webmasters_service.sites().list().execute()

        all_sites = []
        for each in site_list['siteEntry']:
            all_sites.append(each['siteUrl'])

        return all_sites

    def fetch_search_console_data(self, website_url, start_date, end_date, dimensions, dimensionFilterGroups):
        # Initialize an empty list to store the rows from the response
        all_responses = []

        # Initialize the start row to 0
        start_row = 0

        # Loop until all rows have been retrieved
        while True:
            # Build the request body for the API call
            request_body = {
                "startDate": start_date,
                "endDate": end_date,
                "dimensions": dimensions,
                "dimensionFilterGroups": dimensionFilterGroups,
                "rowLimit": 25000,
                "dataState": "final",
                'startRow': start_row
            }

            # Call the API with the request body
            response_data = self.webmasters_service.searchanalytics().query(siteUrl=website_url, body=request_body).execute()

            # Append the rows from the response to the all_responses list
            for row in response_data['rows']:
                # Create a temporary list to hold the values for the row
                temp = []
                # Extract the values for the keys (dimensions)
                for key in row['keys']:
                    temp.append(key)
                # Extract the values for clicks, impressions, CTR, and position
                temp.append(row['clicks'])
                temp.append(row['impressions'])
                temp.append(row['ctr'])
                temp.append(row['position'])
                # Append the row to the all_responses list
                all_responses.append(temp)

            # Update the start row to reflect the number of rows retrieved
            start_row += len(response_data['rows'])

            # Print a progress message
            print("fetched up to " + str(start_row) + " rows of data")

            # Check if the number of rows retrieved is less than the row limit
            if len(response_data['rows']) < 25000:
                break

        # Create a DataFrame from the all_responses list, with columns corresponding to the requested dimensions and metrics
        df = pd.DataFrame(all_responses, columns=dimensions + ['clicks', 'impressions', 'ctr', 'position'])

        # Return the DataFrame
        return df

    def url_inspection(self, page_url, website_url, languageCode):

      request_body = {"inspectionUrl":page_url,"siteUrl": website_url,"languageCode":languageCode}

      # I learned to make this query from Tobias Willmann - Here is the LinkedIn Post he shared. https://www.linkedin.com/feed/update/urn:li:activity:6893990001873670144/
      json_output = self.webmasters_service.urlInspection().index().inspect(body=request_body).execute()

      return json_output


# Declare Credentials

In [None]:
CLIENT_ID = "1085687348720-cpabg6ttpsqjji0j6pgbkb8sbcdcp4nt.apps.googleusercontent.com"
CLIENT_SECRET = "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"
OAUTH_SCOPE = "https://www.googleapis.com/auth/webmasters.readonly"
REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'

# Authenticate & Authortized

In [None]:
# Create an instance of the GoogleSearchConsole class
gsc = GoogleSearchConsole(CLIENT_ID, CLIENT_SECRET, OAUTH_SCOPE, REDIRECT_URI)



Go to the following link in your browser: https://accounts.google.com/o/oauth2/v2/auth?client_id=1085687348720-cpabg6ttpsqjji0j6pgbkb8sbcdcp4nt.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fwebmasters.readonly&access_type=offline&response_type=code
Enter your Authorization Code here:4/1AcvDMrD3DDXq9ZL-f9AjrYWH07dp2n_DxFFVDeKMXi3d9Vndh22_uKy8HD8


# List of Properties in your GSC Account

In [None]:
# Get a list of sites
sites = gsc.get_site_list()
sites

['https://moneymonkdigital.com/',
 'https://www.monocubed.com/',
 'https://www.tridentpbi.in/']

# URL Inspection API : Single URL

In [None]:
website_url = 'https://www.tridentpbi.in/'
page_url = 'https://www.tridentpbi.in/products/industries/automobiles/'
languageCode = 'en-US'

In [None]:
# Run this code to get the indexing status of the page_url
inspect_result = gsc.url_inspection(page_url, website_url, languageCode)

In [None]:
# print the response
inspect_result

{'inspectionResult': {'inspectionResultLink': 'https://search.google.com/search-console/inspect?resource_id=https://www.tridentpbi.in/&id=CKpaLhMuCQu-FK2TBBP7gg&utm_medium=link&utm_source=api',
  'indexStatusResult': {'verdict': 'PASS',
   'coverageState': 'Submitted and indexed',
   'robotsTxtState': 'ALLOWED',
   'indexingState': 'INDEXING_ALLOWED',
   'lastCrawlTime': '2024-07-28T00:06:38Z',
   'pageFetchState': 'SUCCESSFUL',
   'googleCanonical': 'https://www.tridentpbi.in/products/industries/automobiles/',
   'userCanonical': 'https://www.tridentpbi.in/products/industries/automobiles/',
   'referringUrls': ['https://www.tridentpbi.in/products/corrugated-box-cardboard-cartons/3-ply/',
    'https://www.tridentpbi.in/'],
   'crawledAs': 'MOBILE'},
  'mobileUsabilityResult': {'verdict': 'VERDICT_UNSPECIFIED'}}}

# Option 1 : Scrape URLs from the Sitemap

In [None]:
import requests
from bs4 import BeautifulSoup

def fetch_sitemap_urls(sitemap_url):

    # Set up the headers
    headers = {
      "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
      }

    # Send a GET request to the sitemap URL
    response = requests.get(sitemap_url, headers=headers)
    #print(response)

    # Parse the XML response
    soup = BeautifulSoup(response.text, 'xml')
    #print(soup)

    # Extract URLs, excluding image URLs
    urls = [loc.text for loc in soup.find_all('loc') if 'image:loc' not in str(loc)]
    #print(urls)

    # Convert list to DataFrame with a specified column name
    sitemap_urls_df = pd.DataFrame(urls, columns=['Sitemap_URLs'])

    return sitemap_urls_df

In [None]:
# run the function to get the URLs from sitemap
sitemap_url = 'https://www.tridentpbi.in/page-sitemap.xml'

# running the fuction
sitemap_urls_df = fetch_sitemap_urls(sitemap_url)

#print the df
sitemap_urls_df

Unnamed: 0,Sitemap_URLs
0,https://www.tridentpbi.in/
1,https://www.tridentpbi.in/contact-us/
2,https://www.tridentpbi.in/products/corrugated-...
3,https://www.tridentpbi.in/hot-coffee-cup-sleev...
4,https://www.tridentpbi.in/products/corrugated-...
5,https://www.tridentpbi.in/products/industries/...
6,https://www.tridentpbi.in/products/corrugated-...
7,https://www.tridentpbi.in/products/corrugated-...
8,https://www.tridentpbi.in/products/industries/...
9,https://www.tridentpbi.in/products/industries/...


In [None]:
# limiting URLs up to 5 for this tutorial - comment out this code to get all the URLs
urls_list = sitemap_urls_df.head(10)
urls_list

Unnamed: 0,Sitemap_URLs
0,https://www.tridentpbi.in/
1,https://www.tridentpbi.in/contact-us/
2,https://www.tridentpbi.in/products/corrugated-...
3,https://www.tridentpbi.in/hot-coffee-cup-sleev...
4,https://www.tridentpbi.in/products/corrugated-...
5,https://www.tridentpbi.in/products/industries/...
6,https://www.tridentpbi.in/products/corrugated-...
7,https://www.tridentpbi.in/products/corrugated-...
8,https://www.tridentpbi.in/products/industries/...
9,https://www.tridentpbi.in/products/industries/...


# Fetch Indexing Status for all the URLs.

In [None]:
def inspect_urls(urls_list, website_url, language_code='en-US'):
  # Create an empty list to store the inspection results
  inspection_results = []

  # Iterate over the URLs in the dataframe
  for index, row in urls_list.iterrows():
      page_url = row['Sitemap_URLs']
      print(page_url)

      # Call the url_inspection function
      inspect_result = gsc.url_inspection(page_url, website_url, language_code)

      # Extract the desired data from the inspection result
      json_output = inspect_result['inspectionResult']['indexStatusResult']
      print(json_output)
      print('fetch successful.')

      # Append the inspection result to the list
      inspection_results.append(json_output)

  # Create a new dataframe with the inspection results
  inspection_df = pd.DataFrame(inspection_results)

  # Merge the original dataframe with the inspection results dataframe using the index
  merged_df = pd.merge(urls_list, inspection_df, left_index=True, right_index=True)

  return merged_df


In [None]:
# Example usage
# Assuming urls_list is already defined and contains the URLs
inspection_results = inspect_urls(urls_list, website_url)

https://www.tridentpbi.in/
{'verdict': 'PASS', 'coverageState': 'Submitted and indexed', 'robotsTxtState': 'ALLOWED', 'indexingState': 'INDEXING_ALLOWED', 'lastCrawlTime': '2024-08-10T12:07:14Z', 'pageFetchState': 'SUCCESSFUL', 'googleCanonical': 'https://www.tridentpbi.in/', 'userCanonical': 'https://www.tridentpbi.in/', 'sitemap': ['https://www.tridentpbi.in/sitemap_index.xml'], 'referringUrls': ['https://tridentpbi.home.blog/', 'https://www.tridentpbi.in/products/corrugated-box-cardboard-cartons/duplex-white/', 'http://www.tridentpbi.in/', 'http://tridentpbi.in/'], 'crawledAs': 'MOBILE'}
fetch successful.
https://www.tridentpbi.in/contact-us/
{'verdict': 'PASS', 'coverageState': 'Submitted and indexed', 'robotsTxtState': 'ALLOWED', 'indexingState': 'INDEXING_ALLOWED', 'lastCrawlTime': '2024-07-21T01:02:10Z', 'pageFetchState': 'SUCCESSFUL', 'googleCanonical': 'https://www.tridentpbi.in/contact-us/', 'userCanonical': 'https://www.tridentpbi.in/contact-us/', 'sitemap': ['https://www.tr

In [None]:
#print this dataframe
inspection_results

Unnamed: 0,Sitemap_URLs,verdict,coverageState,robotsTxtState,indexingState,lastCrawlTime,pageFetchState,googleCanonical,userCanonical,sitemap,referringUrls,crawledAs
0,https://www.tridentpbi.in/,PASS,Submitted and indexed,ALLOWED,INDEXING_ALLOWED,2024-08-10T12:07:14Z,SUCCESSFUL,https://www.tridentpbi.in/,https://www.tridentpbi.in/,[https://www.tridentpbi.in/sitemap_index.xml],"[https://tridentpbi.home.blog/, https://www.tr...",MOBILE
1,https://www.tridentpbi.in/contact-us/,PASS,Submitted and indexed,ALLOWED,INDEXING_ALLOWED,2024-07-21T01:02:10Z,SUCCESSFUL,https://www.tridentpbi.in/contact-us/,https://www.tridentpbi.in/contact-us/,[https://www.tridentpbi.in/sitemap_index.xml],[https://www.tridentpbi.in/],MOBILE
2,https://www.tridentpbi.in/products/corrugated-...,PASS,Submitted and indexed,ALLOWED,INDEXING_ALLOWED,2024-08-08T12:11:24Z,SUCCESSFUL,https://www.tridentpbi.in/products/corrugated-...,https://www.tridentpbi.in/products/corrugated-...,,"[https://www.tridentpbi.in/, https://www.tride...",MOBILE
3,https://www.tridentpbi.in/hot-coffee-cup-sleev...,PASS,Submitted and indexed,ALLOWED,INDEXING_ALLOWED,2024-07-27T23:07:13Z,SUCCESSFUL,https://www.tridentpbi.in/hot-coffee-cup-sleev...,https://www.tridentpbi.in/hot-coffee-cup-sleev...,,[https://www.tridentpbi.in/],MOBILE
4,https://www.tridentpbi.in/products/corrugated-...,PASS,Submitted and indexed,ALLOWED,INDEXING_ALLOWED,2024-08-04T00:19:52Z,SUCCESSFUL,https://www.tridentpbi.in/products/corrugated-...,https://www.tridentpbi.in/products/corrugated-...,,"[https://www.tridentpbi.in/, https://www.tride...",MOBILE
5,https://www.tridentpbi.in/products/industries/...,PASS,Submitted and indexed,ALLOWED,INDEXING_ALLOWED,2024-08-06T14:13:38Z,SUCCESSFUL,https://www.tridentpbi.in/products/industries/...,https://www.tridentpbi.in/products/industries/...,,[https://www.tridentpbi.in/],MOBILE
6,https://www.tridentpbi.in/products/corrugated-...,PASS,Submitted and indexed,ALLOWED,INDEXING_ALLOWED,2024-08-05T04:24:50Z,SUCCESSFUL,https://www.tridentpbi.in/products/corrugated-...,https://www.tridentpbi.in/products/corrugated-...,,"[https://www.tridentpbi.in/, https://www.tride...",MOBILE
7,https://www.tridentpbi.in/products/corrugated-...,PASS,Submitted and indexed,ALLOWED,INDEXING_ALLOWED,2024-08-08T00:34:35Z,SUCCESSFUL,https://www.tridentpbi.in/products/corrugated-...,https://www.tridentpbi.in/products/corrugated-...,,[https://www.tridentpbi.in/],MOBILE
8,https://www.tridentpbi.in/products/industries/...,PASS,Submitted and indexed,ALLOWED,INDEXING_ALLOWED,2024-08-03T04:39:42Z,SUCCESSFUL,https://www.tridentpbi.in/products/industries/...,https://www.tridentpbi.in/products/industries/...,,[https://www.tridentpbi.in/products/corrugated...,MOBILE
9,https://www.tridentpbi.in/products/industries/...,PASS,Submitted and indexed,ALLOWED,INDEXING_ALLOWED,2024-06-26T16:32:34Z,SUCCESSFUL,https://www.tridentpbi.in/products/industries/...,https://www.tridentpbi.in/products/industries/...,,[https://www.tridentpbi.in/products/industries...,MOBILE


# Export to Excel

In [None]:
# Save the merged dataframe to a file
inspection_results.to_excel('inspection_results.xlsx', index=False)