In [2]:
from functions import data_funcs

game_data, details_failed, game_failed, tags_failed  = data_funcs.get_game_data()


[INFO] Scraping Steam Charts...
[INFO] Getting each game's details via Steam API...
[INFO] Getting tags and reviews...
[INFO] Combining dataframes...
[SUCCESS] Game data successfully compiled.


In [None]:
import pandas as pd 

game_data["Rank"] = pd.to_numeric(game_data["Rank"].str.replace(r"\.", "", regex=True), errors="coerce").astype("Int64")
game_data["Current"] = pd.to_numeric(game_data["Current"].str.replace(r"\.", "", regex=True), errors="coerce").astype("Int64")
game_data["Peak"] = pd.to_numeric(game_data["Peak"].str.replace(r"\.", "", regex=True), errors="coerce").astype("Int64")
game_data["Player Hours	"] = pd.to_numeric(game_data["Player Hours	"].str.replace(r"\.", "", regex=True), errors="coerce").astype("Int64")


game_data

Unnamed: 0,Rank,Game,Current,Peak,App ID,Player Hours,Genres,Release Date,Days Since Release,Tags,Recent Review Score,Recent Review Count,All Review Score,All Review Count
0,1,Counter-Strike 2,728070,1603583,730,695134509,"[Action, Free To Play]","Aug 21, 2012",4872,"[FPS, Shooter, Multiplayer, Competitive, Actio...",8,77658,8,2453959
1,2,Dota 2,397752,903542,570,434670697,"[Action, Strategy, Free To Play]","Jul 9, 2013",4550,"[Free to Play, MOBA, Multiplayer, Strategy, eS...",8,45058,8,818217
2,3,ARC Raiders,295319,415319,1808500,164373045,[Action],"Oct 30, 2025",54,"[Extraction Shooter, Multiplayer, PvP, PvE, Th...",8,73278,8,122062
3,4,Bongo Cat,130779,178015,3419430,71302464,"[Casual, Indie, Massively Multiplayer, Simulat...","Mar 5, 2025",293,"[Cats, Free to Play, Clicker, Cute, Casual, Id...",9,31861,9,19025
4,5,PUBG: BATTLEGROUNDS,100744,751295,578080,198631711,"[Action, Adventure, Massively Multiplayer, Fre...","Dec 21, 2017",2924,"[Survival, Shooter, Battle Royale, Multiplayer...",6,16454,5,438993
5,6,Marvel Rivals,99995,152267,2767030,55836396,"[Action, Free To Play]","Dec 5, 2024",383,"[Free to Play, Multiplayer, Hero Shooter, Thir...",8,22212,6,254291
6,7,Path of Exile 2,91969,290305,2694490,42489349,"[Action, Adventure, Massively Multiplayer, RPG...","Dec 6, 2024",382,"[Action RPG, Hack and Slash, RPG, Isometric, M...",8,7454,6,86583
7,8,Warframe,86278,173377,230410,56762813,"[Action, RPG, Free To Play]","Mar 25, 2013",4656,"[Free to Play, Looter Shooter, Action RPG, Thi...",8,6088,8,282421
8,9,Rust,84591,174558,252490,72364372,"[Action, Adventure, Indie, Massively Multiplay...","Feb 8, 2018",2875,"[Survival, Crafting, Multiplayer, Open World, ...",8,18572,8,507833
9,10,Battlefield™ 6,80404,244715,2807960,77941516,[Action],"Oct 10, 2025",74,"[FPS, Multiplayer, Action, Military, Singlepla...",6,43181,6,123610


In [5]:
game_data.dtypes

Rank                    Int64
Game                   object
Current                 Int64
Peak                    Int64
App ID                 object
Player Hours           object
Genres                 object
Release Date           object
Days Since Release      int64
Tags                   object
Recent Review Score     int64
Recent Review Count     int64
All Review Score        int64
All Review Count        int64
dtype: object

In [3]:
import os
import pandas as pd
from datetime import datetime
 
dateCollected = datetime.now().strftime("%Y-%m-%d")

# Directory for CSVs
data_dir = os.path.join(os.getcwd(), "Steam Combined Data")
os.makedirs(data_dir, exist_ok=True)

# Save Overall Data
overall_file = os.path.join(data_dir, "Steam_Overall_Data.csv")

if os.path.exists(overall_file):
    overall_df = pd.read_csv(overall_file,index_col=False)
    print(overall_df)
    # # Standardize existing dates to ISO format
    # if "Date Collected" in overall_df.columns:
    #     overall_df["Date Collected"] = pd.to_datetime(overall_df["Date Collected"], errors="coerce").dt.strftime("%Y-%m-%d")

    # # Check for today's entry
    # if dateCollected in overall_df["Date Collected"].values:
    #     print("Data for today already exists in Steam_Overall_Data.csv, skipping save.")
    # else:
    #     overall_df = pd.concat([overall_df, game_data], ignore_index=True)



       Rank                          Game  Current     Peak   App ID  \
0       1.0              Counter-Strike 2   589411  1599759      730   
1       2.0                 Marvel Rivals   296620   496193  2767030   
2       3.0                        Dota 2   271976   714888      570   
3       4.0           PUBG: BATTLEGROUNDS   109129   786821   578080   
4       5.0  Kingdom Come: Deliverance II   106607   255607  1771300   
...     ...                           ...      ...      ...      ...   
4070  171.0                    Diablo® IV     5317    30532  2344520   
4071  172.0                           DSX     5285     6771  1812620   
4072  173.0   Call of Duty: Black Ops III     5250     7096   311210   
4073  174.0                        Kenshi     5249     8861   233860   
4074  175.0                 Summoners War     5191    10507  2426960   

      Player Hours                                             Genres  \
0        671649980                         ['Action', 'Free To

In [6]:
import streamlit as st
import pandas as pd
from functions import filter_funcs as ff
from pymongo import MongoClient
import streamlit as st
import ast

try:
    from creds import CONNECTION_STRING
except ImportError:
    CONNECTION_STRING = st.secrets["CONNECTION_STRING"]


@st.cache_data(ttl=600)
def load_all_steam_data():
    client = MongoClient(CONNECTION_STRING)
    db = client["SteamCollectedData"]
    collection = db["Steam Data"]
    data = list(collection.find({}))
    df = pd.DataFrame(data)
    df.drop(columns=["_id"], inplace=True)

    list_columns = ["Genres", "Tags"]

    for col in list_columns:
        if col in df.columns:
            df[col] = df[col].apply(lambda x: 
                x if isinstance(x, list) 
                else ast.literal_eval(x) if isinstance(x, str) and x.startswith("[") 
                else []
            )

    return df

df = load_all_steam_data()

2025-12-23 22:13:58.187 
  command:

    streamlit run C:\Users\junio\AppData\Roaming\Python\Python313\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [3]:
print(df)

         Rank                               Game Current     Peak   App ID  \
0         1.0                   Counter-Strike 2  589411  1599759      730   
1         2.0                      Marvel Rivals  296620   496193  2767030   
2         3.0                             Dota 2  271976   714888      570   
3         4.0                PUBG: BATTLEGROUNDS  109129   786821   578080   
4         5.0       Kingdom Come: Deliverance II  106607   255607  1771300   
...       ...                                ...     ...      ...      ...   
170920  171.0              Monster Hunter: World    6585    27496   582010   
170921  172.0  Halo: The Master Chief Collection    6503    12154   976730   
170922  173.0                       Conan Exiles    6463     9557   440900   
170923  174.0                      Borderlands 4    6441    15993  1285190   
170924  175.0                              Hades    6412     9291  1145360   

       Player Hours                                            

In [7]:
df.dtypes

Rank                   float64
Game                    object
Current                  int64
Peak                     int64
App ID                  object
Player Hours             int64
Genres                  object
Release Date            object
Days Since Release     float64
Date Collected          object
Tags                    object
Recent Review Score    float64
Recent Review Count    float64
All Review Score       float64
All Review Count       float64
dtype: object

In [4]:
from functions import ml_funcs

sucess_df = ml_funcs.forest_ml(df)
print(sucess_df)

TypeError: unsupported operand type(s) for /: 'str' and 'str'