diff --git a/README.md b/README.md index 2724719..6b241ce 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ This is an HTTP Archive Reporting API that provides reporting data via various e ### Prerequisites -- Node.js 18+ +- Node.js 22+ - npm - Google Cloud account with necessary permissions - Set environment variables: @@ -30,7 +30,6 @@ The API will be available at - **Cache Headers**: 6-hour cache control for static data - **Health Check**: GET `/` returns health status - **RESTful API**: All endpoints follow REST conventions -- **Backend caching**: Some responses are cached on the backend for 1 hours to improve latency ### `GET /` @@ -501,51 +500,6 @@ Returns a JSON object with the following schema: ] ``` -### `GET /cache-stats` - -Provides statistics about the API's cache. - -```bash -curl --request GET \ - --url 'https://{{HOST}}/v1/cache-stats' -``` - -Returns a JSON object with the following schema: - -```json -{ - "cache_hits": 12345, - "cache_misses": 6789, - "last_cleared": "2023-10-01T12:00:00Z" -} -``` - -### `POST /v1/cache-reset` - -Resets all caches in the API. This endpoint requires a POST request. - -```bash -curl --request POST \ - --url 'https://{{HOST}}/v1/cache-reset' -``` - -Returns a JSON object with the following schema: - -```json -{ - "success": true, - "message": "All caches have been reset", - "before": { - "queryCache": 150, - "dateCache": 12 - }, - "after": { - "queryCache": 0, - "dateCache": 0 - } -} -``` - ## Testing ```bash @@ -667,34 +621,4 @@ Response: } ``` -## Cache Stats Private Endpoint - -The Cache Stats private endpoint provides information about the API's cache performance, including cache hits, misses, and the last time the cache was cleared. This endpoint is useful for monitoring and debugging cache behavior. - -```bash -curl "https://tech-report-api-dev-226352634162.us-central1.run.app/v1/cache-stats" \ - -H "Authorization: bearer $(gcloud auth print-identity-token)" -``` - -Returns a JSON object with the following schema: -```json -{ - "queryCache": { - "total": 3220, - "valid": 2437, - "expired": 783, - "ttl": 3600000 - }, - "dateCache": { - "total": 4, - "valid": 4, - "expired": 0, - "ttl": 3600000 - }, - "config": { - "maxCacheSize": 5000, - "cleanupStrategy": "size-based-lru" - } -} -``` diff --git a/src/__tests__/routes.test.js b/src/__tests__/routes.test.js index d9a0c57..1e53238 100644 --- a/src/__tests__/routes.test.js +++ b/src/__tests__/routes.test.js @@ -383,34 +383,291 @@ describe('API Routes', () => { }); }); - describe('Cache Management', () => { - it('should provide cache stats', async () => { - const res = await request(app) - .get('/v1/cache-stats') - .expect(200); + describe('GET /v1/static/*', () => { + beforeEach(() => { + // Reset all mocks before each test + mockFileExists.mockReset(); + mockGetMetadata.mockReset(); + mockCreateReadStream.mockReset(); + mockFile.mockClear(); + mockBucket.mockClear(); + }); + + describe('Valid file requests', () => { + it('should return file content for valid path', async () => { + const fileContent = JSON.stringify({ data: 'test' }); + const readable = Readable.from([fileContent]); + + mockFileExists.mockResolvedValue([true]); + mockGetMetadata.mockResolvedValue([{ + contentType: 'application/json', + etag: '"abc123"', + size: fileContent.length + }]); + mockCreateReadStream.mockReturnValue(readable); + + const res = await request(app) + .get('/v1/static/reports/2024/data.json') + .expect(200); + + expect(res.headers['content-type']).toContain('application/json'); + expect(res.headers['cache-control']).toContain('public'); + expect(res.headers['access-control-allow-origin']).toEqual('*'); + }); + + it('should infer MIME type from file extension when not in metadata', async () => { + const fileContent = '{"test": true}'; + const readable = Readable.from([fileContent]); - expect(res.body).toHaveProperty('queryCache'); - expect(res.body).toHaveProperty('dateCache'); - expect(res.body).toHaveProperty('config'); + mockFileExists.mockResolvedValue([true]); + mockGetMetadata.mockResolvedValue([{ + etag: '"abc123"', + size: fileContent.length + }]); + mockCreateReadStream.mockReturnValue(readable); + + const res = await request(app) + .get('/v1/static/reports/data.json') + .expect(200); + + expect(res.headers['content-type']).toContain('application/json'); + }); + + it('should handle CORS preflight requests', async () => { + const res = await request(app) + .options('/v1/static/reports/data.json') + .set('Origin', 'http://example.com') + .set('Access-Control-Request-Method', 'GET') + .set('Access-Control-Request-Headers', 'Content-Type'); + + expect(res.statusCode).toEqual(204); + expect(res.headers['access-control-allow-origin']).toEqual('*'); + }); }); - it('should reset cache on POST request', async () => { - const res = await request(app) - .post('/v1/cache-reset') - .expect(200); + describe('Invalid file paths (directory traversal attempts)', () => { + it('should reject paths containing double dot sequences', async () => { + // Test with '..' embedded in the path that won't be normalized away + const res = await request(app) + .get('/v1/static/reports/..hidden/passwd') + .expect(400); + expect(res.body).toHaveProperty('error', 'Invalid file path'); + }); - expect(res.body).toHaveProperty('success', true); - expect(res.body).toHaveProperty('message'); - expect(res.body).toHaveProperty('before'); - expect(res.body).toHaveProperty('after'); + it('should reject paths with double slashes', async () => { + const res = await request(app) + .get('/v1/static/reports//data.json') + .expect(400); + + expect(res.body).toHaveProperty('error', 'Invalid file path'); + }); + + it('should reject paths with encoded double dots', async () => { + // URL-encoded '..' = %2e%2e + mockFileExists.mockResolvedValue([false]); // Will be checked after validation + + const res = await request(app) + .get('/v1/static/reports/%2e%2e/secret/passwd'); + + // Should either be rejected as invalid or not found + expect([400, 404]).toContain(res.statusCode); + }); }); - it('should handle cache reset OPTIONS request', async () => { - const res = await request(app) - .options('/v1/cache-reset') - .expect(204); + describe('Non-existent files (404 handling)', () => { + it('should return 404 for non-existent files', async () => { + mockFileExists.mockResolvedValue([false]); + + const res = await request(app) + .get('/v1/static/reports/nonexistent.json') + .expect(404); + + expect(res.body).toHaveProperty('error', 'File not found'); + }); + + it('should return 400 for empty file path', async () => { + const res = await request(app) + .get('/v1/static/') + .expect(400); + + expect(res.body).toHaveProperty('error', 'File path required'); + }); + }); + + describe('Conditional requests (ETag/If-None-Match)', () => { + it('should return 304 when ETag matches If-None-Match header', async () => { + const etag = '"abc123"'; + + mockFileExists.mockResolvedValue([true]); + mockGetMetadata.mockResolvedValue([{ + contentType: 'application/json', + etag: etag, + size: 100 + }]); - expect(res.headers['access-control-allow-methods']).toContain('POST'); + const res = await request(app) + .get('/v1/static/reports/data.json') + .set('If-None-Match', etag) + .expect(304); + + // 304 responses have no body + expect(res.text).toEqual(''); + }); + + it('should return 200 with content when ETag does not match', async () => { + const fileContent = JSON.stringify({ data: 'test' }); + const readable = Readable.from([fileContent]); + + mockFileExists.mockResolvedValue([true]); + mockGetMetadata.mockResolvedValue([{ + contentType: 'application/json', + etag: '"abc123"', + size: fileContent.length + }]); + mockCreateReadStream.mockReturnValue(readable); + + const res = await request(app) + .get('/v1/static/reports/data.json') + .set('If-None-Match', '"different-etag"') + .expect(200); + + expect(res.headers['etag']).toEqual('"abc123"'); + }); + + it('should include ETag in response headers', async () => { + const fileContent = JSON.stringify({ data: 'test' }); + const readable = Readable.from([fileContent]); + + mockFileExists.mockResolvedValue([true]); + mockGetMetadata.mockResolvedValue([{ + contentType: 'application/json', + etag: '"abc123"', + size: fileContent.length + }]); + mockCreateReadStream.mockReturnValue(readable); + + const res = await request(app) + .get('/v1/static/reports/data.json') + .expect(200); + + expect(res.headers).toHaveProperty('etag', '"abc123"'); + }); + }); + + describe('Error scenarios (GCS failures)', () => { + it('should handle GCS exists() failure', async () => { + mockFileExists.mockRejectedValue(new Error('GCS connection failed')); + + const res = await request(app) + .get('/v1/static/reports/data.json') + .expect(500); + + expect(res.body).toHaveProperty('error', 'Failed to retrieve file'); + expect(res.body).toHaveProperty('details'); + }); + + it('should handle GCS getMetadata() failure', async () => { + mockFileExists.mockResolvedValue([true]); + mockGetMetadata.mockRejectedValue(new Error('Metadata retrieval failed')); + + const res = await request(app) + .get('/v1/static/reports/data.json') + .expect(500); + + expect(res.body).toHaveProperty('error', 'Failed to retrieve file'); + }); + + it('should handle stream errors during file read', async () => { + mockFileExists.mockResolvedValue([true]); + mockGetMetadata.mockResolvedValue([{ + contentType: 'application/json', + etag: '"abc123"', + size: 100 + }]); + + // Create a stream that emits an error after a delay + const errorStream = new Readable({ + read() { + // Emit error asynchronously + process.nextTick(() => { + this.destroy(new Error('Stream read error')); + }); + } + }); + mockCreateReadStream.mockReturnValue(errorStream); + + // Use try-catch since stream errors may cause connection issues + try { + const res = await request(app) + .get('/v1/static/reports/data.json') + .timeout(1000); + + // If we get a response, verify error handling + expect([200, 500]).toContain(res.statusCode); + } catch (err) { + // Connection aborted due to stream error is expected behavior + expect(err.message).toMatch(/aborted|ECONNRESET|socket hang up/i); + } + }); + }); + + describe('MIME type detection', () => { + it('should detect application/json for .json files', async () => { + const content = '{"test":true}'; + const readable = Readable.from([content]); + + mockFileExists.mockResolvedValue([true]); + mockGetMetadata.mockResolvedValue([{ size: content.length }]); + mockCreateReadStream.mockReturnValue(readable); + + const res = await request(app) + .get('/v1/static/reports/data.json') + .expect(200); + + expect(res.headers['content-type']).toContain('application/json'); + }); + + it('should detect image/png for .png files', async () => { + const content = Buffer.from([0x89, 0x50, 0x4E, 0x47]); // PNG magic bytes + const readable = Readable.from([content]); + + mockFileExists.mockResolvedValue([true]); + mockGetMetadata.mockResolvedValue([{ size: content.length }]); + mockCreateReadStream.mockReturnValue(readable); + + const res = await request(app) + .get('/v1/static/reports/chart.png') + .buffer(true) + .parse((res, callback) => { + const chunks = []; + res.on('data', chunk => chunks.push(chunk)); + res.on('end', () => callback(null, Buffer.concat(chunks))); + }); + + expect(res.statusCode).toEqual(200); + expect(res.headers['content-type']).toContain('image/png'); + }); + + it('should use application/octet-stream for unknown extensions', async () => { + const content = Buffer.from([0x00, 0x01, 0x02]); + const readable = Readable.from([content]); + + mockFileExists.mockResolvedValue([true]); + mockGetMetadata.mockResolvedValue([{ size: content.length }]); + mockCreateReadStream.mockReturnValue(readable); + + const res = await request(app) + .get('/v1/static/reports/file.xyz') + .buffer(true) + .parse((res, callback) => { + const chunks = []; + res.on('data', chunk => chunks.push(chunk)); + res.on('end', () => callback(null, Buffer.concat(chunks))); + }); + + expect(res.statusCode).toEqual(200); + expect(res.headers['content-type']).toContain('application/octet-stream'); + }); }); }); diff --git a/src/controllers/categoriesController.js b/src/controllers/categoriesController.js index 6a76918..1e20645 100644 --- a/src/controllers/categoriesController.js +++ b/src/controllers/categoriesController.js @@ -53,13 +53,7 @@ const listCategories = async (req, res) => { return data; }; - // Include onlyname and fields in cache key calculation - const customCacheKeyData = { - onlyname: req.query.onlyname || false, - fields: req.query.fields - }; - - await executeQuery(req, res, 'categories', queryBuilder, dataProcessor, customCacheKeyData); + await executeQuery(req, res, 'categories', queryBuilder, dataProcessor); }; export { listCategories }; diff --git a/src/controllers/cdnController.js b/src/controllers/cdnController.js index 9f5f138..de61dfc 100644 --- a/src/controllers/cdnController.js +++ b/src/controllers/cdnController.js @@ -62,10 +62,6 @@ export const proxyReportsFile = async (req, res, filePath) => { // Set response headers res.setHeader('Content-Type', contentType); - res.setHeader('Cache-Control', 'public, max-age=86400'); // 24 hours - res.setHeader('Access-Control-Allow-Origin', '*'); - res.setHeader('Access-Control-Allow-Methods', 'GET, OPTIONS'); - res.setHeader('Access-Control-Allow-Headers', 'Content-Type'); if (metadata.etag) { res.setHeader('ETag', metadata.etag); diff --git a/src/controllers/reportController.js b/src/controllers/reportController.js index 9f24123..d7371a0 100644 --- a/src/controllers/reportController.js +++ b/src/controllers/reportController.js @@ -6,9 +6,6 @@ import { validateRequiredParams, sendValidationError, getLatestDate, - generateQueryCacheKey, - getCachedQueryResult, - setCachedQueryResult, handleControllerError, validateArrayParameter } from '../utils/controllerHelpers.js'; @@ -87,24 +84,6 @@ const createReportController = (reportType) => { startDate = await getLatestDate(firestore, config.table); } - // Create cache key for this specific query - const queryFilters = { - geo: params.geo, - rank: params.rank, - technology: techArray, - startDate: startDate, - endDate: params.end - }; - const cacheKey = generateQueryCacheKey(config.table, queryFilters); - - // Check cache first - const cachedResult = getCachedQueryResult(cacheKey); - if (cachedResult) { - res.statusCode = 200; - res.end(JSON.stringify(cachedResult)); - return; - } - // Build Firestore query let query = firestore.collection(config.table); @@ -136,9 +115,6 @@ const createReportController = (reportType) => { data.push(doc.data()); }); - // Cache the result - setCachedQueryResult(cacheKey, data); - // Send response res.statusCode = 200; res.end(JSON.stringify(data)); diff --git a/src/controllers/technologiesController.js b/src/controllers/technologiesController.js index 5d75aba..a6114ce 100644 --- a/src/controllers/technologiesController.js +++ b/src/controllers/technologiesController.js @@ -66,13 +66,7 @@ const listTechnologies = async (req, res) => { return data; }; - // Include onlyname and fields in cache key calculation - const customCacheKeyData = { - onlyname: req.query.onlyname || false, - fields: req.query.fields - }; - - await executeQuery(req, res, 'technologies', queryBuilder, dataProcessor, customCacheKeyData); + await executeQuery(req, res, 'technologies', queryBuilder, dataProcessor); }; export { diff --git a/src/index.js b/src/index.js index c34f66a..3c16187 100644 --- a/src/index.js +++ b/src/index.js @@ -162,16 +162,6 @@ const handleRequest = async (req, res) => { } else if (pathname === '/v1/versions' && req.method === 'GET') { const { listVersions } = await getController('versions'); await listVersions(req, res); - } else if (pathname === '/v1/cache-stats' && req.method === 'GET') { - // Cache monitoring endpoint - const { getCacheStats } = await import('./utils/controllerHelpers.js'); - const stats = getCacheStats(); - sendJSONResponse(res, stats); - } else if (pathname === '/v1/cache-reset' && req.method === 'POST') { - // Cache reset endpoint - const { resetCache } = await import('./utils/controllerHelpers.js'); - const result = resetCache(); - sendJSONResponse(res, result); } else if (pathname.startsWith('/v1/static/') && req.method === 'GET') { // GCS proxy endpoint for reports files const filePath = pathname.replace('/v1/static/', ''); diff --git a/src/utils/controllerHelpers.js b/src/utils/controllerHelpers.js index b22313f..c1bc2f2 100644 --- a/src/utils/controllerHelpers.js +++ b/src/utils/controllerHelpers.js @@ -41,118 +41,22 @@ const sendValidationError = (res, errors) => { })); }; -// Cache for latest dates to avoid repeated queries -const latestDateCache = new Map(); -const CACHE_TTL = 60 * 60 * 1000; // 1 hour in milliseconds - -const queryResultCache = new Map(); -const QUERY_CACHE_TTL = 60 * 60 * 1000; // 1 hour in milliseconds - -// Cache size limit -const MAX_CACHE_SIZE = 5000; // Maximum number of cache entries - -/** - * Clean up cache when it exceeds size limit (LRU-style cleanup) - * Removes oldest entries first, including expired ones - */ -const cleanupCacheToSize = () => { - const targetSize = Math.floor(MAX_CACHE_SIZE * 0.5); // Clean to 50% of max size - if (queryResultCache.size <= targetSize) return 0; - - const now = Date.now(); - const entries = Array.from(queryResultCache.entries()); - - // Sort by timestamp (oldest first), prioritizing expired entries - entries.sort((a, b) => { - const aExpired = (now - a[1].timestamp) > QUERY_CACHE_TTL; - const bExpired = (now - b[1].timestamp) > QUERY_CACHE_TTL; - - // If one is expired and the other isn't, prioritize expired for deletion - if (aExpired && !bExpired) return -1; - if (!aExpired && bExpired) return 1; - - // If both have same expiry status, sort by timestamp (oldest first) - return a[1].timestamp - b[1].timestamp; - }); - - const deleteCount = queryResultCache.size - targetSize; - for (let i = 0; i < deleteCount && i < entries.length; i++) { - queryResultCache.delete(entries[i][0]); - } -}; - -/** - * Generate a cache key for a query - * @param {string} collection - Collection name - * @param {Object} filters - Query filters - * @returns {string} - Cache key - */ -const generateQueryCacheKey = (collection, filters) => { - return `${collection}:${JSON.stringify(filters)}`; -}; - -/** - * Get cached query result if available and not expired - * @param {string} cacheKey - Cache key - * @returns {Array|null} - Cached result or null - */ -const getCachedQueryResult = (cacheKey) => { - const cached = queryResultCache.get(cacheKey); - if (cached && (Date.now() - cached.timestamp) < QUERY_CACHE_TTL) { - return cached.data; - } - return null; -}; - -/** - * Cache a query result - * @param {string} cacheKey - Cache key - * @param {Array} data - Query result data - */ -const setCachedQueryResult = (cacheKey, data) => { - // Clean up if cache is getting too large before adding new entry - if (queryResultCache.size >= MAX_CACHE_SIZE) { - cleanupCacheToSize(); - } - - queryResultCache.set(cacheKey, { - data: data, - timestamp: Date.now() - }); -}; - /** - * Get the latest date from a collection with caching + * Get the latest date from a collection * @param {Object} firestore - Firestore instance * @param {string} collection - Collection name * @returns {string|null} - Latest date or null */ const getLatestDate = async (firestore, collection) => { - const now = Date.now(); - const cacheKey = collection; - const cached = latestDateCache.get(cacheKey); - - // Check if we have a valid cached result - if (cached && (now - cached.timestamp) < CACHE_TTL) { - return cached.date; - } - // Query for latest date const query = firestore.collection(collection).orderBy('date', 'desc').limit(1); const snapshot = await query.get(); - let latestDate = null; if (!snapshot.empty) { - latestDate = snapshot.docs[0].data().date; + return snapshot.docs[0].data().date; } - // Cache the result - latestDateCache.set(cacheKey, { - date: latestDate, - timestamp: now - }); - - return latestDate; + return null; }; /** @@ -174,54 +78,6 @@ const validateArrayParameter = (value, fieldName = 'parameter') => { return valueArray; }; -/** - * Get cache statistics for monitoring - * @returns {Object} Cache statistics - */ -const getCacheStats = () => { - const now = Date.now(); - - // Count valid vs expired entries - let queryValidCount = 0; - let queryExpiredCount = 0; - for (const [key, value] of queryResultCache) { - if (now - value.timestamp < QUERY_CACHE_TTL) { - queryValidCount++; - } else { - queryExpiredCount++; - } - } - - let dateValidCount = 0; - let dateExpiredCount = 0; - for (const [key, value] of latestDateCache) { - if (now - value.timestamp < CACHE_TTL) { - dateValidCount++; - } else { - dateExpiredCount++; - } - } - - return { - queryCache: { - total: queryResultCache.size, - valid: queryValidCount, - expired: queryExpiredCount, - ttl: QUERY_CACHE_TTL - }, - dateCache: { - total: latestDateCache.size, - valid: dateValidCount, - expired: dateExpiredCount, - ttl: CACHE_TTL - }, - config: { - maxQueryCacheSize: MAX_CACHE_SIZE, - cleanupStrategy: 'size-based-lru' - } - }; -}; - /** * Handle controller errors with consistent error response format * @param {Object} res - Response object @@ -242,30 +98,18 @@ const handleControllerError = (res, error, operation) => { }; /** - * Generic cache-enabled query executor - * Handles caching, query execution, and response for simple queries + * Generic query executor + * Handles query execution and response for simple queries * @param {Object} req - Request object * @param {Object} res - Response object * @param {string} collection - Firestore collection name * @param {Function} queryBuilder - Function to build the query * @param {Function} dataProcessor - Optional function to process results */ -const executeQuery = async (req, res, collection, queryBuilder, dataProcessor = null, customCacheKeyData = null) => { +const executeQuery = async (req, res, collection, queryBuilder, dataProcessor = null) => { try { const params = req.query; - // Generate cache key with custom data if provided - const cacheKeyData = customCacheKeyData ? { ...params, ...customCacheKeyData } : params; - const cacheKey = generateQueryCacheKey(collection, cacheKeyData); - - // Check cache first - const cachedResult = getCachedQueryResult(cacheKey); - if (cachedResult) { - res.statusCode = 200; - res.end(JSON.stringify(cachedResult)); - return; - } - // Build and execute query const query = await queryBuilder(params); const snapshot = await query.get(); @@ -280,9 +124,6 @@ const executeQuery = async (req, res, collection, queryBuilder, dataProcessor = data = dataProcessor(data, params); } - // Cache the result - setCachedQueryResult(cacheKey, data); - // Send response res.statusCode = 200; res.end(JSON.stringify(data)); @@ -318,31 +159,6 @@ const validateTechnologyArray = (technologyParam) => { } }; -/** - * Reset all caches - * @returns {Object} Reset operation result - */ -const resetCache = () => { - const beforeStats = { - queryCache: queryResultCache.size, - dateCache: latestDateCache.size - }; - - // Clear both caches - queryResultCache.clear(); - latestDateCache.clear(); - - return { - success: true, - message: 'All caches have been reset', - before: beforeStats, - after: { - queryCache: queryResultCache.size, - dateCache: latestDateCache.size - } - }; -}; - export { REQUIRED_PARAMS, FIRESTORE_IN_LIMIT, @@ -351,11 +167,6 @@ export { getLatestDate, validateArrayParameter, handleControllerError, - generateQueryCacheKey, - getCachedQueryResult, - setCachedQueryResult, - getCacheStats, executeQuery, - validateTechnologyArray, - resetCache + validateTechnologyArray }; diff --git a/test-api.sh b/test-api.sh index e73e836..d97d9ff 100755 --- a/test-api.sh +++ b/test-api.sh @@ -116,27 +116,4 @@ test_endpoint "/v1/page-weight" "?technology=WordPress&geo=ALL&rank=ALL&start=la # Test audits endpoint test_endpoint "/v1/audits" "?technology=WordPress&geo=ALL&rank=ALL&start=latest" -# Test cache stats endpoint -echo "Testing cache stats endpoint..." -test_endpoint "/v1/cache-stats" "" - -# Test cache reset endpoint -echo "Testing cache reset endpoint..." -echo "Checking cache reset: http://localhost:3000/v1/cache-reset" -response=$(curl -s -w "\n%{http_code}" -X POST "http://localhost:3000/v1/cache-reset") -http_code=$(echo "$response" | tail -n1) -body=$(echo "$response" | sed '$d') - -echo "$body" | jq . -echo "Status code: $http_code" - -if [[ $http_code -ne 200 ]]; then - echo "Error: Cache reset endpoint returned non-200 status code" - exit 1 -fi - -echo "" -echo "----------------------" -echo "" - echo "API tests complete! All endpoints returned 200 status code and CORS is properly configured."