Skip to content

Commit

Permalink
feat: change the way we pass the token (#419)
Browse files Browse the repository at this point in the history
  • Loading branch information
m-maillot committed Apr 5, 2024
1 parent bb2b1eb commit 7dfb6c2
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 30 deletions.
16 changes: 16 additions & 0 deletions src/fetch-data/__tests__/injectUrl.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import { injectToken } from "../injectToken";

describe("scrapUrl", () => {
beforeEach(() => {
process.env.TOKEN_MT = "token_mt";
});

test("injectToken should inject the TOKEN_MT in the URL", async () => {
expect(injectToken("http://monurl.test")).toEqual(
"http://monurl.test?cgtoken=token_mt"
);
expect(injectToken("http://monurl.test/test?withParam=true")).toEqual(
"http://monurl.test/test?withParam=true&cgtoken=token_mt"
);
});
});
8 changes: 4 additions & 4 deletions src/fetch-data/__tests__/scrapUrl.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@ beforeEach(() => {
});

got.mockImplementation((url) => {
if (url === "http://url.ok") {
if (url.startsWith("http://url.ok")) {
return Promise.resolve({
body: `<html><body><h1>hello</h1></body></html>`,
});
}
if (url === "url.wrong-redirect") {
if (url.startsWith("url.wrong-redirect")) {
return Promise.resolve({
body: `HTTP 301 <a href="url.http.fail">url fail</a>`,
});
}
if (url === "url.http.fail") {
if (url.startsWith("url.http.fail")) {
const error = new HTTPError();
error.response = {
statusCode: 500,
Expand All @@ -31,7 +31,7 @@ got.mockImplementation((url) => {
error.name = "HTTPError";
return Promise.reject(error);
}
if (url === "url.parse.fail") {
if (url.startsWith("url.parse.fail")) {
const error = new ParseError();
error.message = "parse fail";
error.name = "ParseError";
Expand Down
14 changes: 0 additions & 14 deletions src/fetch-data/generateHeaders.js

This file was deleted.

8 changes: 3 additions & 5 deletions src/fetch-data/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,15 @@ import got from "got";
import pLimit from "p-limit";
import path from "path";

import { generateHeaders } from "./generateHeaders";
import { injectToken } from "./injectToken";
import { scrapUrl } from "./scrapUrl";

const FEED_URL = "https://travail-emploi.gouv.fr/?page=oseo_json";

const limit = pLimit(10);

export async function fetchFeed(url) {
const response = await got.post(url, {
headers: generateHeaders({
"Content-Type": "application/json",
}),
const response = await got.post(injectToken(url), {
http2: true,
retry: 3,
});
Expand All @@ -26,6 +23,7 @@ export async function fetchFeed(url) {
const { fiches: localFeed } = JSON.parse(localJson);
return [...feed, ...localFeed];
}

export async function scrap(urls) {
const inputs = urls.map(({ id, url }) => limit(() => scrapUrl(id, url)));
const results = await Promise.allSettled(inputs);
Expand Down
15 changes: 15 additions & 0 deletions src/fetch-data/injectToken.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/**
* Inject a specific token to bypass bot protection
*/
export function injectToken(url) {
if (!process.env.TOKEN_MT) {
throw Error(
"Token (cgtoken) is required to fetch the data. This token is provided by the travail-emploi.gouv.fr team."
);
}
if (url.includes("?")) {
return `${url}&cgtoken=${process.env.TOKEN_MT}`;
} else {
return `${url}?cgtoken=${process.env.TOKEN_MT}`;
}
}
11 changes: 4 additions & 7 deletions src/fetch-data/scrapUrl.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,21 @@
import got from "got";
import { JSDOM } from "jsdom";

import { generateHeaders } from "./generateHeaders";
import { injectToken } from "./injectToken";
import { parseDom } from "./parseDom";

export async function scrapUrl(id, url) {
const headers = generateHeaders();
try {
let response = await got(url, {
let response = await got(injectToken(url), {
followRedirect: true,
headers,
http2: true,
retry: 3,
});
if (/HTTP 30\d/.test(response.body)) {
const [, redirectUrl] = response.body.match(/href="(.*)"/);
try {
response = await got(redirectUrl, {
response = await got(injectToken(redirectUrl), {
followRedirect: true,
headers,
http2: true,
retry: 3,
});
Expand All @@ -35,7 +32,7 @@ export async function scrapUrl(id, url) {
err = new Error(`Parsing Error: ${error.message}`);
} else if (error instanceof got.HTTPError) {
err = new Error(
`HTTP Error: ${error.response.statusCode} - ${error.options.url.href} - ${error.message}`
`HTTP Error: ${error.response.statusCode} - ${url} - ${error.message}`
);
} else {
err = new Error(error.message);
Expand Down

0 comments on commit 7dfb6c2

Please sign in to comment.