diff --git a/.changeset/orange-badgers-accept.md b/.changeset/orange-badgers-accept.md new file mode 100644 index 0000000..15fdc9f --- /dev/null +++ b/.changeset/orange-badgers-accept.md @@ -0,0 +1,5 @@ +--- +"google-indexing-script": patch +--- + +Add site url checker diff --git a/README.md b/README.md index 76e5fcd..15fda72 100644 --- a/README.md +++ b/README.md @@ -66,10 +66,8 @@ Run the script with the domain or url you want to index. ```bash gis -# `domain` property on gsc +# example gis seogets.com -# `url prefix` property on gsc -gis https://seogets.com ``` When in doubt try both 😀 diff --git a/src/index.ts b/src/index.ts index 740c2bb..97c8b85 100644 --- a/src/index.ts +++ b/src/index.ts @@ -6,6 +6,7 @@ import { getEmojiForStatus, getPageIndexingStatus, convertToFilePath, + checkSiteUrl, } from "./shared/gsc"; import { getSitemapPages } from "./shared/sitemap"; import { Status } from "./shared/types"; @@ -48,7 +49,7 @@ export const index = async ( } const accessToken = await getAccessToken(options.client_email, options.private_key, options.path); - const siteUrl = convertToSiteUrl(input); + let siteUrl = convertToSiteUrl(input); console.log(`🔎 Processing site: ${siteUrl}`); const cachePath = path.join(".cache", `${convertToFilePath(siteUrl)}.json`); @@ -58,6 +59,8 @@ export const index = async ( process.exit(1); } + siteUrl = await checkSiteUrl(accessToken, siteUrl); + const [sitemaps, pages] = await getSitemapPages(accessToken, siteUrl); if (sitemaps.length === 0) { diff --git a/src/shared/gsc.ts b/src/shared/gsc.ts index a5cb5fb..8a3091c 100644 --- a/src/shared/gsc.ts +++ b/src/shared/gsc.ts @@ -1,3 +1,4 @@ +import { webmasters_v3 } from "googleapis"; import { Status } from "./types"; import { fetchRetry } from "./utils"; @@ -22,6 +23,103 @@ export function convertToFilePath(path: string) { return path.replace("http://", "http_").replace("https://", "https_").replace("/", "_"); } +/** + * Converts an HTTP URL to a sc-domain URL format. + * @param httpUrl The HTTP URL to be converted. + * @returns The sc-domain formatted URL. + */ +export function convertToSCDomain(httpUrl: string) { + return `sc-domain:${httpUrl.replace("http://", "").replace("https://", "").replace("/", "")}`; +} + +/** + * Converts a domain to an HTTP URL. + * @param domain The domain to be converted. + * @returns The HTTP URL. + */ +export function convertToHTTP(domain: string) { + return `http://${domain}/`; +} + +/** + * Converts a domain to an HTTPS URL. + * @param domain The domain to be converted. + * @returns The HTTPS URL. + */ +export function convertToHTTPS(domain: string) { + return `https://${domain}/`; +} + +/** + * Retrieves a list of sites associated with the specified service account from the Google Webmasters API. + * @param accessToken - The access token for authentication. + * @returns An array containing the site URLs associated with the service account. + */ +export async function getSites(accessToken: string) { + const sitesResponse = await fetchRetry("https://www.googleapis.com/webmasters/v3/sites", { + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${accessToken}`, + }, + }); + + if (sitesResponse.status === 403) { + console.error("🔐 This service account doesn't have access to any sites."); + return []; + } + + const sitesBody: webmasters_v3.Schema$SitesListResponse = await sitesResponse.json(); + + if (!sitesBody.siteEntry) { + console.error("❌ No sites found, add them to Google Search Console and try again."); + return []; + } + + return sitesBody.siteEntry.map((x) => x.siteUrl); +} + +/** + * Checks if the site URL is valid and accessible by the service account. + * @param accessToken - The access token for authentication. + * @param siteUrl - The URL of the site to check. + * @returns The corrected URL if found, otherwise the original site URL. + */ +export async function checkSiteUrl(accessToken: string, siteUrl: string) { + const sites = await getSites(accessToken); + let formattedUrls: string[] = []; + + // Convert the site URL into all possible formats + if (siteUrl.startsWith("https://")) { + formattedUrls.push(siteUrl); + formattedUrls.push(convertToHTTP(siteUrl)); + formattedUrls.push(convertToSCDomain(siteUrl)); + } else if (siteUrl.startsWith("http://")) { + formattedUrls.push(siteUrl); + formattedUrls.push(convertToHTTPS(siteUrl)); + formattedUrls.push(convertToSCDomain(siteUrl)); + } else if (siteUrl.startsWith("sc-domain:")) { + formattedUrls.push(siteUrl); + formattedUrls.push(convertToHTTP(siteUrl.replace("sc-domain:", ""))); + formattedUrls.push(convertToHTTPS(siteUrl.replace("sc-domain:", ""))); + } else { + console.error("❌ Unknown site URL format."); + console.error(""); + process.exit(1); + } + + // Check if any of the formatted URLs are accessible + for (const formattedUrl of formattedUrls) { + if (sites.includes(formattedUrl)) { + return formattedUrl; + } + } + + // If none of the formatted URLs are accessible + console.error("❌ This service account doesn't have access to this site."); + console.error(""); + process.exit(1); +} + /** * Retrieves the indexing status of a page. * @param accessToken - The access token for authentication.