Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion app.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ var cache = apicache.options({
respectCacheControl: false,
// Add origin to cache key to avoid CORS issues with cached responses
appendKey: (req, res) => req.headers.origin || "",
// Only cache successful responses. Excludes error statuses (a transient
// upstream failure would otherwise poison the 1-month TTL) and redirects
// from /api/download (their tokenized target URL expires in minutes).
statusCodes: { include: [200] },
}).middleware;

// Define routes
Expand All @@ -25,9 +29,16 @@ var comPapersRouter = require("./routes/papers_gceguide_com");
var ppcoPapersRouter = require("./routes/pastpapers_co");
var ppcaPapersRouter = require("./routes/papacambridge_com");
var yearsRouter = require("./routes/years");
var downloadRouter = require("./routes/download");

var app = express();

// Trust Fly.io's edge proxy so req.protocol reflects X-Forwarded-Proto.
// Without this the paper-list endpoint builds http:// download URLs even
// when the original request was https, triggering mixed-content blocks
// when the frontend (https) renders the link.
app.set("trust proxy", true);

// Setup views
app.set("views", path.join(__dirname, "views"));
app.set("view engine", "jade");
Expand Down Expand Up @@ -61,7 +72,9 @@ app.use(
})
);
app.use(cookieParser());
app.use(cache("1 month"));
// Skip cache for /api/download — it streams multi-MB binaries and uses a
// per-request tokenized upstream URL, neither of which belongs in apicache.
app.use(cache("1 month", (req) => !req.originalUrl.startsWith("/api/download")));

// Define routes
app.use("/api", indexRouter);
Expand All @@ -71,6 +84,7 @@ app.use("/api/papers/com", comPapersRouter);
app.use("/api/papers/ppco", ppcoPapersRouter);
app.use("/api/papers/ppca", ppcaPapersRouter);
app.use("/api/years", yearsRouter);
app.use("/api/download", downloadRouter);

app.get("/api/cache/clear", (_req, res) => {
res.json(apicache.clear());
Expand Down
7 changes: 6 additions & 1 deletion config/cors.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,16 @@ const corsOptions = {
"X-Request-Id",
],

// Headers exposed to the client
// Headers exposed to the client. Content-Disposition and Content-Length
// matter for /api/download — frontends use them to read the filename
// (for `<a download>` or File System Access) and show progress.
exposedHeaders: [
"X-Total-Count",
"X-Page-Count",
"Content-Disposition",
"Content-Length",
"Content-Range",
"Accept-Ranges",
"X-Request-Id",
"X-Response-Time",
"X-Rate-Limit-Remaining",
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"http-errors": "~2.0.0",
"jade": "~1.11.0",
"morgan": "~1.10.0",
"pdf-lib": "^1.17.1",
"redis": "^4.6.13",
"socks5-https-client": "^1.2.1"
}
Expand Down
37 changes: 37 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 9 additions & 38 deletions routes/cates.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
var Crawler = require("crawler");
var express = require("express");
var router = express.Router();
var { fetchCates } = require("../utils/papersdaddy_wrapper");
/*
const Agent = require('socks5-https-client/lib/Agent');
*/
Expand Down Expand Up @@ -61,44 +62,14 @@ router.get("/ppco/:cate", function (req, res, next) {
]);
});

// PapaCambridge
// as-and-a-level
// igcse
router.get("/ppca/:cate", function (req, res, next) {
const server = "https://pastpapers.papacambridge.com/papers/caie/";
c.queue([
{
uri: `${server}${req.params.cate}`.toLowerCase(),
callback: function (error, resC, done) {
if (error) {
console.log(error);
} else {
let $ = resC.$;
let returnArray = {
cates: new Array(),
count: 0,
};
$("#datafile > div.files-list-main > div").each(function () {
const subject = $(this).text().trim();

if (subject.includes("-") && !subject.includes("No Content Available")) {
const subjectName = subject
.substring(0, subject.lastIndexOf("-"))
.replaceAll("-", " ");
const subjectCode = subject.substring(subject.lastIndexOf("-") + 2);
returnArray.cates.push({
name: `${subjectName} (${subjectCode})`,
});
}
});
returnArray.count = returnArray.cates.length;
console.log(server + req.params.cate);
res.send(JSON.stringify(returnArray));
}
done();
},
},
]);
router.get("/ppca/:cate", function (req, res, _next) {
fetchCates(req.params.cate, function (err, cates) {
if (err) {
console.log(err);
return res.status(502).json({ cates: [], count: 0, error: err.message });
}
res.json({ cates, count: cates.length });
});
});

// GCE Guide
Expand Down
136 changes: 136 additions & 0 deletions routes/download.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
var express = require("express");
var https = require("https");
var router = express.Router();
var { resolveDownload } = require("../utils/papersdaddy_wrapper");
var { stripWatermark } = require("../utils/watermark_stripper");

const SAFE_PATH = /^\/cambridge\/[a-z0-9-]+\/[a-z0-9-]+\/[0-9]{4}-[a-z-]+\/[A-Za-z0-9_.-]+\.(pdf|mp3|docx?)$/;

const STREAM_HEADERS = {
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
Accept: "*/*",
};

const CONTENT_TYPE_BY_EXT = {
pdf: "application/pdf",
mp3: "audio/mpeg",
doc: "application/msword",
docx: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
};

// Returns {start, end} clamped to [0, total-1] or null if the header is
// malformed/unsupported. Only handles single-range "bytes=start-end".
function parseRange(header, total) {
const m = /^bytes=(\d*)-(\d*)$/.exec(header || "");
if (!m) return null;
const hasStart = m[1] !== "";
const hasEnd = m[2] !== "";
if (!hasStart && !hasEnd) return null;

let start, end;
if (!hasStart) {
// Suffix range "bytes=-500" → last 500 bytes
const suffix = parseInt(m[2], 10);
if (suffix <= 0) return null;
start = Math.max(0, total - suffix);
end = total - 1;
} else {
start = parseInt(m[1], 10);
end = hasEnd ? parseInt(m[2], 10) : total - 1;
}
if (start >= total || start < 0 || end < start) return null;
end = Math.min(end, total - 1);
return { start, end };
}

function sendBuffer(req, res, body, contentType, filename, asAttachment) {
const total = body.length;
res.setHeader("Content-Type", contentType);
res.setHeader(
"Content-Disposition",
`${asAttachment ? "attachment" : "inline"}; filename="${filename}"`
);
res.setHeader("Accept-Ranges", "bytes");
res.setHeader("Cache-Control", "public, max-age=86400");

const range = parseRange(req.headers.range, total);
if (range) {
const slice = body.subarray(range.start, range.end + 1);
res.status(206);
res.setHeader("Content-Range", `bytes ${range.start}-${range.end}/${total}`);
res.setHeader("Content-Length", slice.length);
if (req.method === "HEAD") return res.end();
return res.end(slice);
}

res.status(200);
res.setHeader("Content-Length", total);
if (req.method === "HEAD") return res.end();
return res.end(body);
}

function handle(req, res) {
const subpath = req.path;
if (!SAFE_PATH.test(subpath)) {
return res.status(400).json({ error: "invalid path", path: subpath });
}
const asAttachment = req.query.download === "1" || req.query.download === "true";

resolveDownload(subpath, function (err, url) {
if (err) {
console.log(err);
return res.status(502).json({ error: err.message });
}

const filename = subpath.split("/").pop();
const ext = filename.split(".").pop().toLowerCase();
const contentType = CONTENT_TYPE_BY_EXT[ext] || "application/octet-stream";

const upstream = https.get(url, { headers: STREAM_HEADERS }, function (up) {
if (up.statusCode !== 200) {
res.status(502).json({ error: `upstream ${up.statusCode}` });
up.resume();
return;
}

const chunks = [];
up.on("data", (c) => chunks.push(c));
up.on("end", async () => {
const raw = Buffer.concat(chunks);
let body = raw;
if (ext === "pdf") {
try {
body = await stripWatermark(raw);
} catch (e) {
console.log("watermark strip failed, serving original:", e.message);
}
}
if (res.writableEnded || res.destroyed) return;
sendBuffer(req, res, body, contentType, filename, asAttachment);
});
up.on("error", function (e) {
console.log(e);
if (!res.headersSent) res.status(502).json({ error: e.message });
});
});

upstream.on("error", function (e) {
console.log(e);
if (!res.headersSent) {
res.status(502).json({ error: e.message });
} else {
res.destroy();
}
});

req.on("close", function () {
upstream.destroy();
});
});
}

router.get(/.*/, handle);
router.head(/.*/, handle);

module.exports = router;
Loading
Loading