diff --git a/README.md b/README.md
index b18f834..e3af1f8 100644
--- a/README.md
+++ b/README.md
@@ -37,6 +37,26 @@ php artisan vendor:publish --provider="Kalimeromk\Rssfeed\RssFeedServiceProvider
This will publish a rssfeed.php config file to your config directory. Here you can set the XPaths for content elements.
+```php
+return [
+ 'domain_xpaths' => [
+ [
+ 'domain' => 'mistagogia.mk',
+ 'content_element_xpaths' => [
+ '//div[@class="single_post"]',
+ ],
+ ],
+ ],
+ 'min_image_width' => 300,
+ 'image_storage_path' => 'images',
+];
+
+```
+### In this configuration file:
+
+* domain_xpaths: Defines specific XPaths for content elements based on the domain. This allows for precise targeting of content within the RSS feed items for each domain.
+* min_image_width: Sets the minimum width for images to be considered for storage, ensuring that only images of adequate size are saved.
+* image_storage_path: Specifies the path where images from RSS feed items should be stored.
## Credits
This package was created by KalimeroMK.
@@ -78,3 +98,4 @@ use Kalimeromk\Rssfeed\Jobs\RssFeedJob;
$feedUrls = ['https://example.com/rss'];
RssFeedJob::dispatch($feedUrls);
+```
\ No newline at end of file
diff --git a/config/rssfeed.php b/config/rssfeed.php
index 0879a2a..5e12040 100644
--- a/config/rssfeed.php
+++ b/config/rssfeed.php
@@ -1,9 +1,15 @@
[
- '//div[@class="post-content"]',
- '//div[@class="article-body"]',
- '//div[@class="td-post-content"]',
- '//div[contains(concat(" ", normalize-space(@class), " "), " post-single-content ") and contains(concat(" ", normalize-space(@class), " "), " box ") and contains(concat(" ", normalize-space(@class), " "), " mark-links ")]',
+ 'domain_xpaths' => [
+ [
+ 'domain' => 'mistagogia.mk',
+ 'content_element_xpaths' => [
+ '//div[@class="single_post"]',
+ ],
+ ],
],
-];
\ No newline at end of file
+ 'min_image_width' => 300,
+ 'image_storage_path' => 'images',
+];
+
diff --git a/src/RssFeed.php b/src/RssFeed.php
index bf151c2..60f5c6a 100644
--- a/src/RssFeed.php
+++ b/src/RssFeed.php
@@ -46,7 +46,7 @@ public function parseRssFeeds(array $feedUrls, $jobId = null): array
$fullContent = $this->retrieveFullContent($itemLink);
// Save the image to storage
- $images = $this->saveImageToStorage($fullContent);
+ $images = $this->saveImagesToStorage($fullContent['images']);
// Add the extracted item data to the parsedItems array
$parsedItems[] = [
@@ -71,15 +71,16 @@ public function parseRssFeeds(array $feedUrls, $jobId = null): array
* @return array|bool
* @throws CantOpenFileFromUrlException
*/
- public function saveImageToStorage(array $images): array
+ public function saveImagesToStorage(array $images): array
{
$savedImageNames = [];
+ $imageStoragePath = config('rssfeed.image_storage_path', 'images');
foreach ($images as $image) {
- $file = UrlUploadedFile::createFromUrl($image);
- $imageName = Str::random(15) . '.' . $file->extension();
- $file->storeAs('images', $imageName, 'public');
- $savedImageNames[] = $imageName;
+ $file = UrlUploadedFile::createFromUrl($image);
+ $imageName = Str::random(15) . '.' . $file->extension();
+ $file->storeAs($imageStoragePath, $imageName, 'public');
+ $savedImageNames[] = $imageName;
}
@@ -92,54 +93,54 @@ public function saveImageToStorage(array $images): array
*/
public function retrieveFullContent(string $postLink): bool|array
{
- // Fetch the HTML content using cURL
- $html = $this->fetchContentUsingCurl($postLink); // Use the previously defined cURL fetching function
-
- if ($html === false) {
- return false; // Handle the error as appropriate
- }
-
- // Load the HTML content into DOMDocument
+ $html = $this->fetchContentUsingCurl($postLink);
+ $parsedUrl = parse_url($postLink);
+ $host = $parsedUrl['host'] ?? '';
+ $domains = config('rssfeed.domain_xpaths');
+ $contentElementXPaths = collect($domains)->firstWhere('domain', $host)['content_element_xpaths'];
$dom = new DOMDocument();
@$dom->loadHTML($html);
-
- // Use DOMXPath to work with the DOM
$xpath = new DOMXPath($dom);
- // Initialize an array to hold the image URLs
$imageUrls = [];
$selectedContent = '';
+ $minImageWidth = config('rssfeed.min_image_width', 600); // Retrieve the minimum image width from config
- // Process each XPath query in the configuration
- foreach ($config['content_element_xpaths'] as $xpathQuery) {
+ foreach ($contentElementXPaths as $xpathQuery) {
$elements = $xpath->query($xpathQuery);
-
- // Check if elements were found for the current XPath query
if ($elements->length > 0) {
foreach ($elements as $element) {
- // Extract and concatenate the HTML of each matching element
$selectedContent .= $dom->saveHTML($element);
-
- // Find and store all tags within the current element
$images = $xpath->query('.//img', $element);
foreach ($images as $img) {
$src = $img->getAttribute('src');
- $imageUrls[] = $src;
+ // Get image dimensions
+ list($width, $height) = getimagesize($src);
+ if ($width >= $minImageWidth) {
+ // Add to array if not already added
+ if (!in_array($src, $imageUrls)) {
+ $imageUrls[] = $src;
+ }
+ }
}
}
}
}
- // Optionally, you might want to remove