From 77b94edeef863721c07bd3e12d6d38052723f422 Mon Sep 17 00:00:00 2001 From: Antoine Kingue Date: Mon, 18 Mar 2024 16:42:26 +0100 Subject: [PATCH 1/3] add site url checker (#37) --- .changeset/orange-badgers-accept.md | 5 ++ src/index.ts | 5 +- src/shared/gsc.ts | 121 ++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 .changeset/orange-badgers-accept.md diff --git a/.changeset/orange-badgers-accept.md b/.changeset/orange-badgers-accept.md new file mode 100644 index 0000000..15fdc9f --- /dev/null +++ b/.changeset/orange-badgers-accept.md @@ -0,0 +1,5 @@ +--- +"google-indexing-script": patch +--- + +Add site url checker diff --git a/src/index.ts b/src/index.ts index 740c2bb..97c8b85 100644 --- a/src/index.ts +++ b/src/index.ts @@ -6,6 +6,7 @@ import { getEmojiForStatus, getPageIndexingStatus, convertToFilePath, + checkSiteUrl, } from "./shared/gsc"; import { getSitemapPages } from "./shared/sitemap"; import { Status } from "./shared/types"; @@ -48,7 +49,7 @@ export const index = async ( } const accessToken = await getAccessToken(options.client_email, options.private_key, options.path); - const siteUrl = convertToSiteUrl(input); + let siteUrl = convertToSiteUrl(input); console.log(`🔎 Processing site: ${siteUrl}`); const cachePath = path.join(".cache", `${convertToFilePath(siteUrl)}.json`); @@ -58,6 +59,8 @@ export const index = async ( process.exit(1); } + siteUrl = await checkSiteUrl(accessToken, siteUrl); + const [sitemaps, pages] = await getSitemapPages(accessToken, siteUrl); if (sitemaps.length === 0) { diff --git a/src/shared/gsc.ts b/src/shared/gsc.ts index a5cb5fb..c212781 100644 --- a/src/shared/gsc.ts +++ b/src/shared/gsc.ts @@ -1,3 +1,4 @@ +import { webmasters_v3 } from "googleapis"; import { Status } from "./types"; import { fetchRetry } from "./utils"; @@ -22,6 +23,126 @@ export function convertToFilePath(path: string) { return path.replace("http://", "http_").replace("https://", "https_").replace("/", "_"); } +/** + * Converts an HTTP URL to a sc-domain URL format. + * @param httpUrl The HTTP URL to be converted. + * @returns The sc-domain formatted URL. + */ +export function convertToSCDomain(httpUrl: string) { + return `sc-domain:${httpUrl.replace("http://", "").replace("https://", "").replace("/", "")}`; +} + +/** + * Converts a domain to an HTTP URL. + * @param domain The domain to be converted. + * @returns The HTTP URL. + */ +export function convertToHTTP(domain: string) { + return `http://${domain}/`; +} + +/** + * Converts a domain to an HTTPS URL. + * @param domain The domain to be converted. + * @returns The HTTPS URL. + */ +export function convertToHTTPS(domain: string) { + return `https://${domain}/`; +} + +/** + * Retrieves a list of sites associated with the specified service account from the Google Webmasters API. + * @param accessToken - The access token for authentication. + * @returns An array containing the site URLs associated with the service account. + */ +export async function getSites(accessToken: string) { + const sitesResponse = await fetchRetry('https://www.googleapis.com/webmasters/v3/sites', { + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${accessToken}`, + }, + }); + + if (sitesResponse.status === 403) { + console.error('🔐 This service account doesn\'t have access to any sites.'); + return []; + } + + const sitesBody: webmasters_v3.Schema$SitesListResponse = await sitesResponse.json(); + + if (!sitesBody.siteEntry) { + console.error('❌ No sites found, add them to Google Search Console and try again.'); + return []; + } + + return sitesBody.siteEntry.map((x) => x.siteUrl); +} + +/** + * Checks if the site URL is valid and accessible by the service account. + * @param accessToken - The access token for authentication. + * @param siteUrl - The URL of the site to check. + * @returns The corrected URL if found, otherwise the original site URL. + */ +export async function checkSiteUrl( + accessToken: string, + siteUrl: string +) { + const sites = await getSites(accessToken); + + if (!sites.includes(siteUrl)) { + if (siteUrl.startsWith("sc-domain:")) { + if (sites.includes(convertToHTTP(siteUrl.replace("sc-domain:", "")))) { + const correctUrl = convertToHTTP(siteUrl.replace("sc-domain:", "")); + console.warn(`🚨 Found HTTP version of the site, please next time use this format instead: ${correctUrl}`); + return correctUrl; + } else if (sites.includes(convertToHTTPS(siteUrl.replace("sc-domain:", "")))) { + const correctUrl = convertToHTTPS(siteUrl.replace("sc-domain:", "")); + console.warn(`🚨 Found HTTPS version of the site, please next time use this format instead: ${correctUrl}`); + return correctUrl; + } else { + console.error("❌ This service account doesn't have access to this site."); + console.error(""); + process.exit(1); + } + } else if (siteUrl.startsWith("https://")) { + if (sites.includes(convertToHTTP(siteUrl))) { + const correctUrl = convertToHTTP(siteUrl); + console.warn(`🚨 Found HTTP version of the site, please next time use this format instead: ${correctUrl}`); + return correctUrl; + } else if (sites.includes(convertToSCDomain(siteUrl))) { + const correctUrl = convertToSCDomain(siteUrl); + console.warn(`🚨 Found sc-domain version of the site, please next time use this format instead: ${correctUrl.replace("sc-domain:", "")}`); + return correctUrl; + } else { + console.error("❌ This service account doesn't have access to this site."); + console.error(""); + process.exit(1); + } + } else if (siteUrl.startsWith("http://")) { + if (sites.includes(convertToHTTPS(siteUrl))) { + const correctUrl = convertToHTTPS(siteUrl); + console.warn(`🚨 Found HTTPS version of the site, please next time use this format instead: ${correctUrl}`); + return correctUrl; + } else if (sites.includes(convertToSCDomain(siteUrl))) { + const correctUrl = convertToSCDomain(siteUrl); + console.warn(`🚨 Found sc-domain version of the site, please next time use this format instead: ${correctUrl.replace("sc-domain:", "")}`); + return correctUrl; + } else { + console.error("❌ This service account doesn't have access to this site."); + console.error(""); + process.exit(1); + } + } else { + console.error("❌ Unknown site URL format."); + console.error(""); + process.exit(1); + } + } else { + return siteUrl; + } +} + /** * Retrieves the indexing status of a page. * @param accessToken - The access token for authentication. From 1c330a7c2fc1ededd349c4293c6fe8054350ccf1 Mon Sep 17 00:00:00 2001 From: Antoine Kingue Date: Tue, 19 Mar 2024 10:14:52 +0100 Subject: [PATCH 2/3] refactor site url checking logic --- src/shared/gsc.ts | 93 ++++++++++++++++++----------------------------- 1 file changed, 35 insertions(+), 58 deletions(-) diff --git a/src/shared/gsc.ts b/src/shared/gsc.ts index c212781..8a3091c 100644 --- a/src/shared/gsc.ts +++ b/src/shared/gsc.ts @@ -56,22 +56,22 @@ export function convertToHTTPS(domain: string) { * @returns An array containing the site URLs associated with the service account. */ export async function getSites(accessToken: string) { - const sitesResponse = await fetchRetry('https://www.googleapis.com/webmasters/v3/sites', { + const sitesResponse = await fetchRetry("https://www.googleapis.com/webmasters/v3/sites", { headers: { - 'Content-Type': 'application/json', + "Content-Type": "application/json", Authorization: `Bearer ${accessToken}`, }, }); if (sitesResponse.status === 403) { - console.error('🔐 This service account doesn\'t have access to any sites.'); + console.error("🔐 This service account doesn't have access to any sites."); return []; } const sitesBody: webmasters_v3.Schema$SitesListResponse = await sitesResponse.json(); if (!sitesBody.siteEntry) { - console.error('❌ No sites found, add them to Google Search Console and try again.'); + console.error("❌ No sites found, add them to Google Search Console and try again."); return []; } @@ -84,63 +84,40 @@ export async function getSites(accessToken: string) { * @param siteUrl - The URL of the site to check. * @returns The corrected URL if found, otherwise the original site URL. */ -export async function checkSiteUrl( - accessToken: string, - siteUrl: string -) { +export async function checkSiteUrl(accessToken: string, siteUrl: string) { const sites = await getSites(accessToken); - - if (!sites.includes(siteUrl)) { - if (siteUrl.startsWith("sc-domain:")) { - if (sites.includes(convertToHTTP(siteUrl.replace("sc-domain:", "")))) { - const correctUrl = convertToHTTP(siteUrl.replace("sc-domain:", "")); - console.warn(`🚨 Found HTTP version of the site, please next time use this format instead: ${correctUrl}`); - return correctUrl; - } else if (sites.includes(convertToHTTPS(siteUrl.replace("sc-domain:", "")))) { - const correctUrl = convertToHTTPS(siteUrl.replace("sc-domain:", "")); - console.warn(`🚨 Found HTTPS version of the site, please next time use this format instead: ${correctUrl}`); - return correctUrl; - } else { - console.error("❌ This service account doesn't have access to this site."); - console.error(""); - process.exit(1); - } - } else if (siteUrl.startsWith("https://")) { - if (sites.includes(convertToHTTP(siteUrl))) { - const correctUrl = convertToHTTP(siteUrl); - console.warn(`🚨 Found HTTP version of the site, please next time use this format instead: ${correctUrl}`); - return correctUrl; - } else if (sites.includes(convertToSCDomain(siteUrl))) { - const correctUrl = convertToSCDomain(siteUrl); - console.warn(`🚨 Found sc-domain version of the site, please next time use this format instead: ${correctUrl.replace("sc-domain:", "")}`); - return correctUrl; - } else { - console.error("❌ This service account doesn't have access to this site."); - console.error(""); - process.exit(1); - } - } else if (siteUrl.startsWith("http://")) { - if (sites.includes(convertToHTTPS(siteUrl))) { - const correctUrl = convertToHTTPS(siteUrl); - console.warn(`🚨 Found HTTPS version of the site, please next time use this format instead: ${correctUrl}`); - return correctUrl; - } else if (sites.includes(convertToSCDomain(siteUrl))) { - const correctUrl = convertToSCDomain(siteUrl); - console.warn(`🚨 Found sc-domain version of the site, please next time use this format instead: ${correctUrl.replace("sc-domain:", "")}`); - return correctUrl; - } else { - console.error("❌ This service account doesn't have access to this site."); - console.error(""); - process.exit(1); - } - } else { - console.error("❌ Unknown site URL format."); - console.error(""); - process.exit(1); - } + let formattedUrls: string[] = []; + + // Convert the site URL into all possible formats + if (siteUrl.startsWith("https://")) { + formattedUrls.push(siteUrl); + formattedUrls.push(convertToHTTP(siteUrl)); + formattedUrls.push(convertToSCDomain(siteUrl)); + } else if (siteUrl.startsWith("http://")) { + formattedUrls.push(siteUrl); + formattedUrls.push(convertToHTTPS(siteUrl)); + formattedUrls.push(convertToSCDomain(siteUrl)); + } else if (siteUrl.startsWith("sc-domain:")) { + formattedUrls.push(siteUrl); + formattedUrls.push(convertToHTTP(siteUrl.replace("sc-domain:", ""))); + formattedUrls.push(convertToHTTPS(siteUrl.replace("sc-domain:", ""))); } else { - return siteUrl; + console.error("❌ Unknown site URL format."); + console.error(""); + process.exit(1); } + + // Check if any of the formatted URLs are accessible + for (const formattedUrl of formattedUrls) { + if (sites.includes(formattedUrl)) { + return formattedUrl; + } + } + + // If none of the formatted URLs are accessible + console.error("❌ This service account doesn't have access to this site."); + console.error(""); + process.exit(1); } /** From ecc9db54d83b11407525ec93ce8b6efb9542d477 Mon Sep 17 00:00:00 2001 From: Antoine Kingue Date: Tue, 19 Mar 2024 10:15:08 +0100 Subject: [PATCH 3/3] update gis command examples in readme --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 76e5fcd..15fda72 100644 --- a/README.md +++ b/README.md @@ -66,10 +66,8 @@ Run the script with the domain or url you want to index. ```bash gis -# `domain` property on gsc +# example gis seogets.com -# `url prefix` property on gsc -gis https://seogets.com ``` When in doubt try both 😀