Skip to content

Commit

Permalink
944b2f9306
Browse files Browse the repository at this point in the history
  • Loading branch information
Liftingthedata committed Jun 28, 2023
1 parent fc6b7ce commit 0dfc77e
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions scrapers/vgchartz/scrape_game_sales.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import time
import urllib
from datetime import datetime
from typing import Optional

import pandas as pd
Expand Down Expand Up @@ -82,8 +83,8 @@ def build_url(genre: str, console_type: str, page_num: int) -> str:
return url


import dateparser
import pandas as pd
def parse_date(date_str):
return datetime.strptime(date_str, "%dth %b %y")


def clean_data(df: pd.DataFrame) -> pd.DataFrame:
Expand All @@ -108,8 +109,8 @@ def clean_data(df: pd.DataFrame) -> pd.DataFrame:
{"XS": "Xbox Series X", "XOne": "Xbox One", "X360": "Xbox 360", "XB": "Xbox"}
)

df["Release Date"] = df["Release Date"].apply(dateparser.parse)
df["Last Update"] = df["Last Update"].apply(dateparser.parse)
df["Release Date"] = df["Release Date"].apply(parse_date)
df["Last Updat"] = df["Last Updat"].apply(parse_date)
df["Release Year"] = df["Release Date"].dt.year

df = df.dropna(subset=["Release Year"])
Expand Down

0 comments on commit 0dfc77e

Please sign in to comment.