Skip to content

Commit

Permalink
Hash-based caching
Browse files Browse the repository at this point in the history
  • Loading branch information
Alkarex committed Jun 23, 2024
1 parent a676d40 commit ee843b3
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 5 deletions.
20 changes: 20 additions & 0 deletions src/Cache/BaseDataCache.php
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,24 @@ public function delete_data(string $key): bool
{
return $this->cache->unlink();
}

/**
* Retrieve the last modified time for the cache
*
* @return int Timestamp
*/
public function mtime(): int
{
return $this->cache->mtime();
}

/**
* Set the last modified time to the current time
*
* @return bool Success status
*/
public function touch(): bool
{
return $this->cache->touch();
}
}
14 changes: 14 additions & 0 deletions src/Cache/DataCache.php
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,18 @@ public function set_data(string $key, array $value, ?int $ttl = null): bool;
* MUST be thrown if the $key string is not a legal value.
*/
public function delete_data(string $key): bool;

/**
* Retrieve the last modified time for the cache
*
* @return int Timestamp
*/
public function mtime(): int;

/**
* Set the last modified time to the current time
*
* @return bool Success status
*/
public function touch(): bool;
}
65 changes: 60 additions & 5 deletions src/SimplePie.php
Original file line number Diff line number Diff line change
Expand Up @@ -1634,14 +1634,48 @@ public function enable_exceptions(bool $enable = true)
$this->enable_exceptions = $enable;
}

/**
* Computes a hash of the raw feed content,
* after having cleaned it from noisy elements such as statistics or comments.
* FreshRSS
* @return string $rss A hash of the cleaned content, or empty string in case of error.
*/
function clean_hash(string $rss): string
{
if ($rss === '') {
return '';
}
//Process by chunks not to use too much memory
if (($stream = fopen('php://temp', 'r+'))
&& fwrite($stream, $rss)
&& rewind($stream)
) {
$ctx = hash_init('sha1');
while ($stream_data = fread($stream, 1048576)) {
hash_update(
$ctx, preg_replace(
[
'#<(lastBuildDate|pubDate|updated|feedDate|dc:date|slash:comments)>[^<]+</\\1>#',
'#<(media:starRating|media:statistics) [^/<>]+/>#',
'#<!--.+?-->#s',
], '', $stream_data
)
);
}
fclose($stream);
return hash_final($ctx);
}
return '';
}

/**
* Initialize the feed object
*
* This is what makes everything happen. Period. This is where all of the
* configuration options get processed, feeds are fetched, cached, and
* parsed, and all of that other good stuff.
*
* @return bool True if successful, false otherwise
* @return bool|int positive integer with modification time if using cache, boolean true if otherwise successful, false otherwise // FreshRSS
*/
public function init()
{
Expand Down Expand Up @@ -1718,6 +1752,7 @@ public function init()
$this->check_modified = false;
$this->multifeed_objects = [];
$cache = false;
$hash = '';

if ($this->feed_url !== null) {
$parsed_feed_url = $this->registry->call(Misc::class, 'parse_url', [$this->feed_url]);
Expand All @@ -1729,12 +1764,15 @@ public function init()

// Fetch the data into $this->raw_data
if (($fetched = $this->fetch_data($cache)) === true) {
return true;
return empty($this->data['mtime']) ? false : $this->data['mtime']; // FreshRSS
} elseif ($fetched === false) {
return false;
}

[$headers, $sniffed] = $fetched;
if (isset($this->data['hash']) && is_string($this->data['hash'])) { // FreshRSS
$hash = $this->data['hash'];
}
}

// Empty response check
Expand Down Expand Up @@ -1803,6 +1841,8 @@ public function init()
$this->data['headers'] = $headers;
}
$this->data['build'] = Misc::get_build();
$this->data['hash'] = $hash === '' ? $this->clean_hash($this->raw_data) : $hash; // FreshRSS
$this->data['mtime'] = time(); // FreshRSS

// Cache the file if caching is enabled
$this->data['cache_expiration_time'] = $this->cache_duration + time();
Expand Down Expand Up @@ -1874,7 +1914,10 @@ protected function fetch_data(&$cache)
// Load the Cache
$this->data = $cache->get_data($cacheKey, []);

if (!empty($this->data)) {
if ($cache->mtime() + $this->cache_duration > time()) { // FreshRSS
$this->raw_data = false;
return true; // If the cache is still valid, just return true
} elseif (!empty($this->data)) {
// If the cache is for an outdated build of SimplePie
if (!isset($this->data['build']) || $this->data['build'] !== Misc::get_build()) {
$cache->delete_data($cacheKey);
Expand Down Expand Up @@ -1906,7 +1949,7 @@ protected function fetch_data(&$cache)
// when requesting this file. (Note that it's up to the file to
// support this, but we don't always send the headers either.)
$this->check_modified = true;
if (isset($this->data['headers']['last-modified']) || isset($this->data['headers']['etag'])) {
{ // if (isset($this->data['headers']['last-modified']) || isset($this->data['headers']['etag'])) { // FreshRSS removed
$headers = [
'Accept' => SimplePie::DEFAULT_HTTP_ACCEPT_HEADER,
];
Expand Down Expand Up @@ -1942,6 +1985,17 @@ protected function fetch_data(&$cache)
return true;
}
}
{ // FreshRSS
$hash = $this->clean_hash($file->get_body_content());
if ($this->data['hash'] === $hash) {
syslog(LOG_DEBUG, 'SimplePie hash cache match for ' . $this->feed_url);
$cache->touch();
return true; // Content unchanged even though server did not send a 304
} else {
syslog(LOG_DEBUG, 'SimplePie hash cache no match for ' . $this->feed_url);
$this->data['hash'] = $hash;
}
}
}
// If the cache is still valid, just return true
else {
Expand Down Expand Up @@ -2069,6 +2123,8 @@ protected function fetch_data(&$cache)
'feed_url' => $file->get_final_requested_uri(),
'build' => Misc::get_build(),
'cache_expiration_time' => $this->cache_duration + time(),
'hash' => $hash === '' ? $this->clean_hash($file->get_body_content()) : $hash, // FreshRSS
'mtime' => time(), // FreshRSS
];

if (!$cache->set_data($cacheKey, $this->data, $this->cache_duration)) {
Expand All @@ -2081,7 +2137,6 @@ protected function fetch_data(&$cache)
}

$this->raw_data = $file->get_body_content();
$this->raw_data = trim($this->raw_data);
$this->permanent_url = $file->get_permanent_uri();

$headers = [];
Expand Down

0 comments on commit ee843b3

Please sign in to comment.