Skip to content

Commit

Permalink
Fixed paid_content scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
UltimaHoarder committed May 28, 2021
1 parent d379758 commit d273285
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 33 deletions.
27 changes: 17 additions & 10 deletions apis/onlyfans/classes/create_auth.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from apis.onlyfans.classes.create_post import create_post
from apis.onlyfans.classes.create_message import create_message
from itertools import chain, product
from apis import api_helper
from apis.onlyfans.classes.extras import auth_details, content_types, create_headers, endpoint_links, error_details, handle_refresh
from apis.onlyfans.classes.create_user import create_user

import requests
from typing import Optional, Union
from typing import Any, Optional, Union
from apis.api_helper import session_manager
import copy
from user_agent import generate_user_agent
Expand Down Expand Up @@ -408,19 +410,24 @@ def get_paid_content(
refresh: bool = True,
limit: int = 99,
offset: int = 0,
):
inside_loop:bool = False
)->list[Union[create_message,create_post]]:
api_type = "paid_content"
if not self.active:
return
return []
if not refresh:
result = handle_refresh(self, api_type)
if result:
return result
link = endpoint_links(global_limit=limit, global_offset=offset).paid_api
session = self.session_manager.sessions[0]
results = self.session_manager.json_request(link)
if len(results) >= limit and not check:
results2 = self.get_paid_content(limit=limit, offset=limit + offset)
results.extend(results2)
self.paid_content = results
return results
final_results = self.session_manager.json_request(link)
if len(final_results) >= limit and not check:
results2 = self.get_paid_content(limit=limit, offset=limit + offset, inside_loop=True)
final_results.extend(results2)
if not inside_loop:
temp = []
temp += [create_message(x) for x in final_results if x["responseType"] == "message"]
temp += [create_post(x) for x in final_results if x["responseType"] == "post"]
final_results = temp
self.paid_content = final_results
return final_results
49 changes: 26 additions & 23 deletions modules/onlyfans.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,20 +314,18 @@ def paid_content_scraper(api: start, identifiers=[]):
return
authed.subscriptions = authed.subscriptions
for paid_content in paid_contents:
author = paid_content.get("author")
author = paid_content.get("fromUser", author)
author = None
if isinstance(paid_content, create_message):
author = paid_content.fromUser
elif isinstance(paid_content, create_post):
author = paid_content.author
if not author:
continue
subscription = authed.get_subscription(check=True, identifier=author["id"])
if not subscription:
subscription = create_user(author)
authed.subscriptions.append(subscription)
if paid_content["responseType"] == "post":
if paid_content["isArchived"]:
print(f"Model: {author['username']}")
# print(
# "ERROR, PLEASE REPORT THIS AS AN ISSUE AND TELL ME WHICH MODEL YOU'RE SCRAPIMG, THANKS")
# input()
# exit()
api_type = paid_content["responseType"].capitalize() + "s"
api_type = paid_content.responseType.capitalize() + "s"
api_media = getattr(subscription.temp_scraped, api_type)
api_media.append(paid_content)
count = 0
Expand All @@ -342,14 +340,16 @@ def paid_content_scraper(api: start, identifiers=[]):
site_name = "OnlyFans"
media_type = format_media_types()
count += 1
for api_type, paid_content in subscription.temp_scraped:
for api_type, paid_contents in subscription.temp_scraped:
if api_type == "Archived":
if any(x for k, x in paid_content if not x):
if any(x for k, x in paid_contents if not x):
input(
"OPEN A ISSUE GITHUB ON GITHUB WITH THE MODEL'S USERNAME AND THIS ERROR, THANKS"
)
exit(0)
continue
if not paid_contents:
continue
mandatory_directories = {}
mandatory_directories["profile_directory"] = profile_directory
mandatory_directories["download_directory"] = download_directory
Expand All @@ -368,16 +368,19 @@ def paid_content_scraper(api: start, identifiers=[]):
metadata_path = os.path.join(
formatted_metadata_directory, api_type + ".db"
)
unrefined_set = media_scraper(
paid_content,
authed,
subscription,
formatted_directories,
username,
api_type,
pool = subscription.session_manager.pool
unrefined_result = pool.starmap(
media_scraper,
product(
paid_contents,
[authed],
[subscription],
[formatted_directories],
[username],
[api_type],
),
)
unrefined_set = [x for x in [unrefined_set]]
new_metadata = main_helper.format_media_set(unrefined_set)
new_metadata = main_helper.format_media_set(unrefined_result)
new_metadata = new_metadata["content"]
if new_metadata:
api_path = os.path.join(api_type, "")
Expand Down Expand Up @@ -1065,7 +1068,7 @@ def media_scraper(
if media_username != username:
continue
final_text = rawText if rawText else text

if date == "-001-11-30T00:00:00+00:00":
date_string = master_date
date_object = datetime.strptime(master_date, "%d-%m-%Y %H:%M:%S")
Expand All @@ -1075,7 +1078,7 @@ def media_scraper(
date_object = datetime.fromisoformat(date)
date_string = date_object.replace(tzinfo=None).strftime("%d-%m-%Y %H:%M:%S")
master_date = date_string
new_post["post_id"] = post_result.id
new_post["post_id"] = post_id
new_post["text"] = final_text
new_post["postedAt"] = date_string
new_post["paid"] = False
Expand Down

0 comments on commit d273285

Please sign in to comment.