Skip to content

Commit

Permalink
b6e1f72a5e
Browse files Browse the repository at this point in the history
  • Loading branch information
Liftingthedata committed Jun 28, 2023
1 parent fb98e73 commit ca9031e
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 5 deletions.
4 changes: 2 additions & 2 deletions airflow/dags/tweet_scrape_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@
"owner": "airflow",
"start_date": datetime(2022, 12, 1),
"depends_on_past": False,
"retries": 1,
"retries": 3,
"retry_delay": timedelta(seconds=60),
"concurrency": 0,
"concurrency": 3,
# "max_active_runs": 1,
"in_cluster": True,
"random_name_postfix_length": 3,
Expand Down
31 changes: 28 additions & 3 deletions scrapers/metacritic/scrape_games_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,30 @@ def extract_user_rating_count(soup: BeautifulSoup) -> int:
return 0


def calculate_weighted_score(row: pd.Series) -> float:
"""
Calculates the weighted score based on the provided equation.
Args:
row: A pandas Series containing the necessary data for calculation.
Returns:
The calculated weighted score.
"""
critic_reviews_count = row["Critic Reviews Count"] or 0
meta_score = row["Meta Score"] or 0
user_rating_count = row["User Rating Count"] or 0
user_score = row["User Score"] or 0
critic_weight = critic_reviews_count * 0.25 * meta_score
user_weight = user_rating_count * 0.75 * user_score

total_reviews_count = critic_reviews_count + user_rating_count

weighted_score = (critic_weight + user_weight) / total_reviews_count

return weighted_score


if __name__ == "__main__":
console = os.getenv("console")
local_path = os.getenv("local_path")
Expand All @@ -307,7 +331,8 @@ def extract_user_rating_count(soup: BeautifulSoup) -> int:
data = scrape_game_data(game_url)
if data is not None:
game_data.append(data)
df1 = pd.DataFrame.from_records(game_data)
df1 = add_gamepass_status(df1)
df1.to_parquet(f"{local_path}{console}-games.parquet")
df = pd.DataFrame.from_records(game_data)
df["Weighted Score"] = df.apply(calculate_weighted_score, axis=1)
df = add_gamepass_status(df)
df.to_parquet(f"{local_path}{console}-games.parquet")
print("done")

0 comments on commit ca9031e

Please sign in to comment.