Skip to content

Commit

Permalink
Merge pull request #44 from ArkeeAgency/antoinekm/recheck-opposite-pr…
Browse files Browse the repository at this point in the history
…operty

add site url checker
  • Loading branch information
goenning committed Mar 19, 2024
2 parents 2524b62 + ecc9db5 commit 8598813
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 4 deletions.
5 changes: 5 additions & 0 deletions .changeset/orange-badgers-accept.md
@@ -0,0 +1,5 @@
---
"google-indexing-script": patch
---

Add site url checker
4 changes: 1 addition & 3 deletions README.md
Expand Up @@ -66,10 +66,8 @@ Run the script with the domain or url you want to index.

```bash
gis <domain or url>
# `domain` property on gsc
# example
gis seogets.com
# `url prefix` property on gsc
gis https://seogets.com
```

When in doubt try both 😀
Expand Down
5 changes: 4 additions & 1 deletion src/index.ts
Expand Up @@ -6,6 +6,7 @@ import {
getEmojiForStatus,
getPageIndexingStatus,
convertToFilePath,
checkSiteUrl,
} from "./shared/gsc";
import { getSitemapPages } from "./shared/sitemap";
import { Status } from "./shared/types";
Expand Down Expand Up @@ -48,7 +49,7 @@ export const index = async (
}

const accessToken = await getAccessToken(options.client_email, options.private_key, options.path);
const siteUrl = convertToSiteUrl(input);
let siteUrl = convertToSiteUrl(input);
console.log(`🔎 Processing site: ${siteUrl}`);
const cachePath = path.join(".cache", `${convertToFilePath(siteUrl)}.json`);

Expand All @@ -58,6 +59,8 @@ export const index = async (
process.exit(1);
}

siteUrl = await checkSiteUrl(accessToken, siteUrl);

const [sitemaps, pages] = await getSitemapPages(accessToken, siteUrl);

if (sitemaps.length === 0) {
Expand Down
98 changes: 98 additions & 0 deletions src/shared/gsc.ts
@@ -1,3 +1,4 @@
import { webmasters_v3 } from "googleapis";
import { Status } from "./types";
import { fetchRetry } from "./utils";

Expand All @@ -22,6 +23,103 @@ export function convertToFilePath(path: string) {
return path.replace("http://", "http_").replace("https://", "https_").replace("/", "_");
}

/**
* Converts an HTTP URL to a sc-domain URL format.
* @param httpUrl The HTTP URL to be converted.
* @returns The sc-domain formatted URL.
*/
export function convertToSCDomain(httpUrl: string) {
return `sc-domain:${httpUrl.replace("http://", "").replace("https://", "").replace("/", "")}`;
}

/**
* Converts a domain to an HTTP URL.
* @param domain The domain to be converted.
* @returns The HTTP URL.
*/
export function convertToHTTP(domain: string) {
return `http://${domain}/`;
}

/**
* Converts a domain to an HTTPS URL.
* @param domain The domain to be converted.
* @returns The HTTPS URL.
*/
export function convertToHTTPS(domain: string) {
return `https://${domain}/`;
}

/**
* Retrieves a list of sites associated with the specified service account from the Google Webmasters API.
* @param accessToken - The access token for authentication.
* @returns An array containing the site URLs associated with the service account.
*/
export async function getSites(accessToken: string) {
const sitesResponse = await fetchRetry("https://www.googleapis.com/webmasters/v3/sites", {
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${accessToken}`,
},
});

if (sitesResponse.status === 403) {
console.error("🔐 This service account doesn't have access to any sites.");
return [];
}

const sitesBody: webmasters_v3.Schema$SitesListResponse = await sitesResponse.json();

if (!sitesBody.siteEntry) {
console.error("❌ No sites found, add them to Google Search Console and try again.");
return [];
}

return sitesBody.siteEntry.map((x) => x.siteUrl);
}

/**
* Checks if the site URL is valid and accessible by the service account.
* @param accessToken - The access token for authentication.
* @param siteUrl - The URL of the site to check.
* @returns The corrected URL if found, otherwise the original site URL.
*/
export async function checkSiteUrl(accessToken: string, siteUrl: string) {
const sites = await getSites(accessToken);
let formattedUrls: string[] = [];

// Convert the site URL into all possible formats
if (siteUrl.startsWith("https://")) {
formattedUrls.push(siteUrl);
formattedUrls.push(convertToHTTP(siteUrl));
formattedUrls.push(convertToSCDomain(siteUrl));
} else if (siteUrl.startsWith("http://")) {
formattedUrls.push(siteUrl);
formattedUrls.push(convertToHTTPS(siteUrl));
formattedUrls.push(convertToSCDomain(siteUrl));
} else if (siteUrl.startsWith("sc-domain:")) {
formattedUrls.push(siteUrl);
formattedUrls.push(convertToHTTP(siteUrl.replace("sc-domain:", "")));
formattedUrls.push(convertToHTTPS(siteUrl.replace("sc-domain:", "")));
} else {
console.error("❌ Unknown site URL format.");
console.error("");
process.exit(1);
}

// Check if any of the formatted URLs are accessible
for (const formattedUrl of formattedUrls) {
if (sites.includes(formattedUrl)) {
return formattedUrl;
}
}

// If none of the formatted URLs are accessible
console.error("❌ This service account doesn't have access to this site.");
console.error("");
process.exit(1);
}

/**
* Retrieves the indexing status of a page.
* @param accessToken - The access token for authentication.
Expand Down

0 comments on commit 8598813

Please sign in to comment.