In [1]:
import pandas as pd
import seaborn as sns
import os

In [2]:
pd.set_option("display.max_columns", None)

In [3]:
print(os.getcwd())

/Users/matheus/Projects/steam_analysis/steam-analysis/notebooks/analysis


In [4]:
os.chdir("../..")

In [5]:
from src.features.build_features import (
    CreateTargetColumns,
    CreateBinaryColumns,
    DateColumnsTransformer,
    DeleteNaNRows,
    CounterTransformer,
    YearThresholdTransformer,
    IndieTransformer,
)

In [6]:
from sklearn.pipeline import Pipeline

pipeline = Pipeline(
    steps=[
        ("drop_NaN_columns", DeleteNaNRows()),
        ("create_binary_columns", CreateBinaryColumns()),
        ("create_counter_columns", CounterTransformer()),
        ("create_target_columns", CreateTargetColumns(threshold=500)),
        ("create_date_columns", DateColumnsTransformer()),
        ("filter_dataset_year", YearThresholdTransformer()),
        ("filter_dataset_indie_only", IndieTransformer()),
    ]
)

In [7]:
steam_df = pd.read_csv("data/raw/games.csv")

In [8]:
steam_df.columns = [col.lower().replace(" ", "_") for col in steam_df.columns]

In [9]:
pipeline.fit_transform(steam_df)

Unnamed: 0,appid,name,release_date,estimated_owners,peak_ccu,required_age,price,dlc_count,about_the_game,supported_languages,full_audio_languages,reviews,header_image,website,support_url,support_email,windows,mac,linux,metacritic_score,metacritic_url,user_score,positive,negative,score_rank,achievements,recommendations,notes,average_playtime_forever,average_playtime_two_weeks,median_playtime_forever,median_playtime_two_weeks,developers,publishers,categories,genres,tags,screenshots,movies,has_publisher,total_reviews,perceived_quality,has_support_email,has_support_url,has_website,n_screens,n_movies,n_tags,n_languages,target_success,month,day,year
68042,1364760,COTTOn Rock'n'Roll -SUPERLATIVE NIGHT DREAMS-,2023-01-05,0 - 20000,12,0,22.49,0,The original beautiful girl side-scrolling sho...,"{ 'Traditional Chinese', 'Japanese', 'English...","['Japanese', 'Simplified Chinese']",,https://cdn.akamai.steamstatic.com/steam/apps/...,https://cotton-rock-n-roll.success-corp.co.jp/,https://www.success-corp.co.jp/inquiry/consume...,,True,False,False,0,,0,10,0,,23,0,,0,0,0,0,Studio Saizensen,SUCCESS Corp.,"Single-player,Steam Achievements,Full controll...",Action,"Action,Arcade,Shooter,Shoot 'Em Up,Female Prot...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,True,10,10.000000,False,True,True,10,2,6,9,False,1,5,2023
68049,2185760,Under the Sky World,2023-01-04,0 - 20000,14,0,14.39,0,'Under the Sky World' is an orthodox casual RP...,"{ 'English', 'Traditional Chinese', 'Japanese...",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,,,bokibokiofficial@gmail.com,True,False,False,0,,0,10,0,,0,0,,0,0,0,0,サークル☆フェアリーフラワー,BokiBoki Games,Single-player,"Adventure,Casual,Indie,RPG","JRPG,Sexual Content,Casual,Hentai,Female Prota...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,True,10,10.000000,True,False,False,9,2,17,4,False,1,4,2023
68066,2088160,天外武林 (Traveler of Wuxia),2023-01-05,20000 - 50000,5707,0,22.49,0,Traveler of Wuxia is a deck-buiding roguelite ...,"{ 'Traditional Chinese', 'Simplified Chinese'}",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,,https://www.facebook.com/heluogame/,,True,False,False,0,,0,702,165,,37,878,,427,427,427,427,香港商河洛互動娛樂股份有限公司,香港商河洛互動娛樂股份有限公司,"Single-player,Steam Achievements,Steam Cloud","RPG,Strategy","Rogue-like,Deckbuilding,Card Game,RPG,Card Bat...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,False,867,4.228916,False,True,False,11,1,19,2,True,1,5,2023
68069,2209560,NoEL - Nothing on Elysion Line,2023-01-05,0 - 20000,6,0,17.49,0,"게임 기본 정보 =============== ◎ 전투를 통해 덱을 수정하고, 장비를...",{'Korean'},['Korean'],,https://cdn.akamai.steamstatic.com/steam/apps/...,,,xenogames00@gmail.com,True,False,False,0,,0,7,3,,0,0,,0,0,0,0,Xenogames,Xenogames,Single-player,"Adventure,Casual,Indie,RPG,Strategy","Card Game,Rogue-like,Anime,Visual Novel,Roguel...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,False,10,1.750000,True,False,False,8,1,20,1,False,1,5,2023
68074,1816070,ILLUMINATI,2023-01-06,0 - 20000,7,0,21.24,0,Battle for world domination in this darkly sat...,{'English'},['English'],,https://cdn.akamai.steamstatic.com/steam/apps/...,https://illuminati.game,https://illuminati.game,support@radiofree.net,True,True,False,0,,0,11,5,,0,0,"Some depictions of bloody violence, mortal per...",0,0,0,0,"Derek Pearcy,Matt Seifert",RadioFree.Net,"Single-player,Multi-player,PvP,Online PvP,Cros...","Casual,Indie,Strategy","Strategy,Card Battler,PvP,Casual,Card Game,2D,...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,True,16,1.833333,True,True,True,7,1,17,1,False,1,6,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85028,1281490,The Night is Grey,2024-01-05,0 - 20000,0,0,14.39,2,Graham is alone in a forest filled with strang...,{'English'},[],“Everything about The Night is Grey oozes qual...,https://cdn.akamai.steamstatic.com/steam/apps/...,https://whalestork.com/,https://whalestork.com/,info@whalestork.com,True,True,False,0,,0,14,3,,16,0,Although there are no graphic depictions of vi...,0,0,0,0,Whalestork Interactive,Whalestork Interactive,"Single-player,Steam Achievements","Adventure,Casual,Indie","Point & Click,Adventure,Indie,Psychological Ho...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,False,17,3.500000,True,True,True,7,1,20,1,False,1,5,2024
85034,2234690,The Day Before You Gone,2024-01-05,20000 - 50000,3,0,7.64,2,"Set in a medieval fantasy world, an era filled...","{'English', 'Russian'}",['Russian'],,https://cdn.akamai.steamstatic.com/steam/apps/...,https://vk.com/daybeforeyougone,https://vk.com/daybeforeyougone,studio@aequalis.ru,True,False,False,0,,0,11,0,,30,0,,0,0,0,0,Aequalis Studio,Aequalis Studio,"Single-player,Steam Achievements","Adventure,Indie","Adventure,Visual Novel,Fantasy,Medieval,Psycho...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,False,11,11.000000,True,True,True,8,1,20,2,False,1,5,2024
85037,2674190,Akai Onna,2024-01-04,0 - 20000,0,0,2.54,0,Akai Onna | 赤い女 is a psychological J-horror ga...,"{ 'Japanese', 'English'}",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,,,yamotoinfo01@gmail.com,True,False,False,0,,0,19,2,,2,0,This game may contain content inappropriate fo...,0,0,0,0,Yamoto,Yamoto,Single-player,"Casual,Indie","Walking Simulator,Casual,First-Person,3D,Horro...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,False,21,6.333333,True,False,False,7,1,12,2,False,1,4,2024
85065,2459870,Drop Doll,2024-01-05,0 - 20000,0,0,1.79,1,Game Introduction: 'Drop Doll' is a casual puz...,"{ 'English', 'Traditional Chinese', 'Japanes...",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,,,liub3562@gmail.com,True,False,False,0,,0,24,4,,0,0,This Game may contain content not appropriate ...,0,0,0,0,No.25,No.25,Single-player,"Casual,Early Access","Mature,Sexual Content,Casual,Relaxing,NSFW,2D,...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,False,28,4.800000,True,False,False,6,1,14,4,False,1,5,2024


In [8]:
teste = DeleteNaNRows()

In [9]:
steam_df = teste.fit_transform(steam_df)

In [10]:
teste = CounterTransformer()

In [11]:
steam_df = teste.fit_transform(steam_df)

In [12]:
steam_df

Unnamed: 0,appid,name,release_date,estimated_owners,peak_ccu,required_age,price,dlc_count,about_the_game,supported_languages,full_audio_languages,reviews,header_image,website,support_url,support_email,windows,mac,linux,metacritic_score,metacritic_url,user_score,positive,negative,score_rank,achievements,recommendations,notes,average_playtime_forever,average_playtime_two_weeks,median_playtime_forever,median_playtime_two_weeks,developers,publishers,categories,genres,tags,screenshots,movies,n_screens,n_movies,n_tags,n_languages
0,20200,Galactic Bowling,"Oct 21, 2008",0 - 20000,0,0,19.99,0,Galactic Bowling is an exaggerated and stylize...,{'English'},[],,https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.galacticbowling.net,,,True,False,False,0,,0,6,11,,30,0,,0,0,0,0,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports","Indie,Casual,Sports,Bowling",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,10,1,4,1
1,655370,Train Bandit,"Oct 12, 2017",0 - 20000,0,0,0.99,0,THE LAW!! Looks to be a showdown atop a train....,"{ 'Japanese', 'Simplified Chinese', 'Traditi...",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,http://trainbandit.com,,support@rustymoyher.com,True,True,False,0,,0,53,5,,12,0,,0,0,0,0,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie","Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,5,1,20,10
3,1355720,Henosis™,"Jul 23, 2020",0 - 20000,0,0,5.99,0,HENOSIS™ is a mysterious 2D Platform Puzzler w...,"{ 'Japanese', 'Simplified Chinese', 'Traditi...",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,https://henosisgame.com/,https://henosisgame.com/,info@henosisgame.com,True,True,True,0,,0,3,0,,0,0,,0,0,0,0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie","2D Platformer,Atmospheric,Surreal,Mystery,Puzz...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,7,1,19,11
4,1139950,Two Weeks in Painland,"Feb 3, 2020",0 - 20000,0,0,0.00,0,ABOUT THE GAME Play as a hacker who has arrang...,"{'English', 'Spanish - Spain'}",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,https://www.unusual-games.com/home/,https://www.unusual-games.com/contact/,welistentoyou@unusual-games.com,True,True,False,0,,0,50,8,,17,0,This Game may contain content not appropriate ...,0,0,0,0,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie","Indie,Adventure,Nudity,Violent,Sexual Content,...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,24,1,6,2
5,1469160,Wartune Reborn,"Feb 26, 2021",50000 - 100000,68,0,0.00,0,Feel tired of auto-fight? Feel tired of boring...,{'English'},[],,https://cdn.akamai.steamstatic.com/steam/apps/...,,https://7.wan.com,https://wartune@7road.com,True,False,False,0,,0,87,49,,0,0,,0,0,0,0,7Road,7Road,"Single-player,Multi-player,MMO,PvP,Online PvP,...","Adventure,Casual,Free to Play,Massively Multip...","Turn-Based Combat,Massively Multiplayer,Multip...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,12,1,20,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85077,2704060,Ant Farm Simulator,"Jan 5, 2024",0 - 20000,3,0,0.99,0,Ant Farm (formicarium) With A Colony Of Ants. ...,"{ 'Japanese', 'Vietnamese', 'Ukrainian', 'P...",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,,,smirna.simulator.games@gmail.com,True,False,False,0,,0,1,1,,0,0,,0,0,0,0,Smirna Simulator Games,Smirna Simulator Games,Single-player,"Casual,Indie,Simulation,Early Access","Simulation,Casual,Sandbox,Farming Sim,Life Sim...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,5,2,18,11
85079,2645600,The Holyburn Witches,"Jan 5, 2024",0 - 20000,0,0,2.99,0,"In the once tranquil village of Holyburn, Main...","{ 'Japanese', 'Simplified Chinese', 'Korean'...","['English', 'Simplified Chinese']",,https://cdn.akamai.steamstatic.com/steam/apps/...,,www.mooliongames.com,mooliongames@gmail.com,True,True,False,0,,0,1,3,,0,0,,0,0,0,0,Moolion,Moolion,Single-player,"Casual,Indie,Early Access","Casual,Adventure,Point & Click,Exploration,3D,...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,5,2,16,9
85083,2464700,Digital Girlfriend,"Jan 5, 2024",0 - 20000,0,0,3.74,1,《Digital Girlfriend》 is a nurturing game of su...,"{ 'Traditional Chinese', 'Simplified Chinese'...",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,,,1364276391@qq.com,True,False,False,0,,0,8,7,,0,0,This game may contain content not suitable for...,0,0,0,0,4XPet,4XPet,Single-player,"Adventure,Casual,Indie","Casual,Sexual Content,Nudity,Adventure,Mature,...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,10,1,19,3
85085,2602790,Above the Hill,"Jan 5, 2024",0 - 20000,0,0,8.49,0,A horror game about a hicker who found himself...,"{ 'Arabic', 'English', 'French'}",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,,,kashwahed1999@gmail.com,True,False,False,0,,0,2,1,,11,0,,0,0,0,0,M.Mustapha Hadi,M.Mustapha Hadi,"Single-player,Steam Achievements","Adventure,Indie","Adventure,Action-Adventure,Exploration,FPS,3D,...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,7,3,15,3


In [35]:
X_transformed = steam_df
X_transformed["supported_languages"] = X_transformed["supported_languages"].apply(
    lambda x: x.replace("[", "").replace("]", "")
)  # Change string format
X_transformed["supported_languages"] = X_transformed["supported_languages"].apply(
    lambda x: set(x.split(","))
)  # Transform into a set
X_transformed["n_languages"] = X_transformed["supported_languages"].apply(
    lambda n: len(n)
)

AttributeError: 'set' object has no attribute 'replace'

In [36]:
steam_df["screenshots"].apply(lambda x: len(set(x.split(","))))

0        10
1         5
2         6
3         7
4        24
         ..
85098     5
85099     5
85100     5
85101    15
85102    10
Name: screenshots, Length: 78590, dtype: int64

In [41]:
X_transformed = steam_df


X_transformed["n_screens"] = X_transformed["screenshots"].apply(
    lambda x: len(set(x.split(",")))
)
X_transformed["n_movies"] = X_transformed["movies"].apply(
    lambda x: len(set(x.split(",")))
)
X_transformed["n_tags"] = X_transformed["tags"].apply(lambda x: len(set(x.split(","))))

AttributeError: 'float' object has no attribute 'split'

In [12]:
X_transformed

Unnamed: 0,appid,name,release_date,estimated_owners,peak_ccu,required_age,price,dlc_count,about_the_game,supported_languages,full_audio_languages,reviews,header_image,website,support_url,support_email,windows,mac,linux,metacritic_score,metacritic_url,user_score,positive,negative,score_rank,achievements,recommendations,notes,average_playtime_forever,average_playtime_two_weeks,median_playtime_forever,median_playtime_two_weeks,developers,publishers,categories,genres,tags,screenshots,movies,n_languages
0,20200,Galactic Bowling,"Oct 21, 2008",0 - 20000,0,0,19.99,0,Galactic Bowling is an exaggerated and stylize...,{'English'},[],,https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.galacticbowling.net,,,True,False,False,0,,0,6,11,,30,0,,0,0,0,0,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports","Indie,Casual,Sports,Bowling",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,1
1,655370,Train Bandit,"Oct 12, 2017",0 - 20000,0,0,0.99,0,THE LAW!! Looks to be a showdown atop a train....,"{ 'Italian', 'French', 'German', 'Tradition...",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,http://trainbandit.com,,support@rustymoyher.com,True,True,False,0,,0,53,5,,12,0,,0,0,0,0,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie","Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,10
2,1732930,Jolt Project,"Nov 17, 2021",0 - 20000,0,0,4.99,0,Jolt Project: The army now has a new robotics ...,"{ 'Portuguese - Brazil', 'English'}",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,,,ramoncampiaof31@gmail.com,True,False,False,0,,0,0,0,,0,0,,0,0,0,0,Campião Games,Campião Games,Single-player,"Action,Adventure,Indie,Strategy",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,2
3,1355720,Henosis™,"Jul 23, 2020",0 - 20000,0,0,5.99,0,HENOSIS™ is a mysterious 2D Platform Puzzler w...,"{ 'Italian', 'French', 'German', 'Tradition...",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,https://henosisgame.com/,https://henosisgame.com/,info@henosisgame.com,True,True,True,0,,0,3,0,,0,0,,0,0,0,0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie","2D Platformer,Atmospheric,Surreal,Mystery,Puzz...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,11
4,1139950,Two Weeks in Painland,"Feb 3, 2020",0 - 20000,0,0,0.00,0,ABOUT THE GAME Play as a hacker who has arrang...,"{ 'Spanish - Spain', 'English'}",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,https://www.unusual-games.com/home/,https://www.unusual-games.com/contact/,welistentoyou@unusual-games.com,True,True,False,0,,0,50,8,,17,0,This Game may contain content not appropriate ...,0,0,0,0,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie","Indie,Adventure,Nudity,Violent,Sexual Content,...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85098,2669080,Mannerheim's Saloon Car,"Jan 2, 2024",0 - 0,0,0,0.00,0,Marshal Mannerheim’s Saloon Car is the train c...,"{'English', 'Finnish'}",['Finnish'],,https://cdn.akamai.steamstatic.com/steam/apps/...,,,xgsmikkeli@gmail.com,True,False,False,0,,0,0,0,,0,0,,0,0,0,0,Xamk Game Studios,"Sodan ja rauhan keskus Muisti, Päämajamuseo","Single-player,Tracked Controller Support,VR Only","Adventure,Simulation",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,2
85099,2736910,Beer Run,"Jan 3, 2024",0 - 0,0,0,0.00,0,Beer Run is an Indie game created to steal bee...,{'English'},[],,https://cdn.akamai.steamstatic.com/steam/apps/...,,,cperez955c@gmail.com,True,False,False,0,,0,0,0,,0,0,,0,0,0,0,955 Games,955 Games,Single-player,"Casual,Indie",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,1
85100,2743220,My Friend The Spider,"Jan 4, 2024",0 - 0,0,0,0.00,0,A small 'horror' narrative game about isolatio...,{'English'},['English'],,https://cdn.akamai.steamstatic.com/steam/apps/...,,https://markleash.itch.io/my-friend-the-spider,,True,False,False,0,,0,0,0,,0,0,,0,0,0,0,MCA,MCA,Single-player,"Adventure,Simulation",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,1
85101,2293130,Path of Survivors,"Jan 8, 2024",0 - 0,0,0,3.99,0,Path of Survivors is a multi-class auto-battle...,{'English'},[],,https://cdn.akamai.steamstatic.com/steam/apps/...,https://www.limitedinput.com/,https://www.limitedinput.com/contact,support@limitedinput.com,True,False,False,0,,0,0,0,,34,0,,0,0,0,0,Limited Input,Limited Input,"Single-player,Steam Achievements,Partial Contr...","Action,Casual,Indie,RPG,Simulation",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...,1


In [27]:
numeric_columns = steam_df.select_dtypes(include=[float, int, bool]).columns

In [43]:
steam_df_2023 = steam_df[steam_df["year"] >= 2023]

In [44]:
steam_df_2023[numeric_columns].groupby(by="success_1000_threshold").median()

Unnamed: 0_level_0,Peak CCU,Required age,Price,DLC count,Windows,Mac,Linux,Metacritic score,User score,Positive,Negative,Score rank,Achievements,Recommendations,Average playtime forever,Average playtime two weeks,Median playtime forever,Median playtime two weeks,about_length,n_screens,n_movies,n_tags,n_languages,has_publisher,perceived_quality,no_user_reactions,total_reviews,has_support_email,has_support_url,has_website,month,day,year,success_500_threshold
success_1000_threshold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1
0,0.0,0.0,4.79,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,1045.0,7.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,7.0,16.0,2023.0,0.0
1,2278.0,0.0,24.99,1.0,1.0,0.0,0.0,0.0,0.0,1859.0,360.0,,38.0,2068.5,354.5,134.5,356.5,157.5,1716.0,10.0,2.0,20.0,11.0,1.0,5.646919,0.0,2421.0,1.0,1.0,1.0,9.0,16.0,2023.0,1.0


## Limiting the dataset

In [56]:
steam_df_indie = steam_df_2023[
    steam_df_2023["Genres"].str.contains("Indie").fillna(False)
].sort_values(by="total_reviews", ascending=False)
steam_df_indie = steam_df_indie[steam_df_indie["Metacritic score"] == 0]
steam_df_indie = steam_df_indie[steam_df_indie["total_reviews"] > 10]

In [57]:
steam_df_indie = steam_df_indie[steam_df_indie["total_reviews"] > 10]

In [59]:
steam_df_indie[numeric_columns].groupby(by="success_500_threshold").median()

Unnamed: 0_level_0,Peak CCU,Required age,Price,DLC count,Windows,Mac,Linux,Metacritic score,User score,Positive,Negative,Score rank,Achievements,Recommendations,Average playtime forever,Average playtime two weeks,Median playtime forever,Median playtime two weeks,about_length,n_screens,n_movies,n_tags,n_languages,has_publisher,perceived_quality,no_user_reactions,total_reviews,has_support_email,has_support_url,has_website,month,day,year,success_1000_threshold
success_500_threshold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1
0,2.0,0.0,8.99,0.0,1.0,0.0,0.0,0.0,0.0,25.0,3.0,,12.0,0.0,0.0,0.0,0.0,0.0,1334.0,8.0,1.0,20.0,2.0,0.0,7.5,0.0,29.0,1.0,0.0,0.0,8.0,16.0,2023.0,0.0
1,432.0,0.0,17.99,0.0,1.0,0.0,0.0,0.0,0.0,847.5,130.5,,28.0,957.0,115.0,2.5,127.5,2.5,1695.5,10.0,2.0,20.0,7.0,1.0,8.594837,0.0,996.0,1.0,1.0,1.0,9.0,16.0,2023.0,0.0


##Having a publisher makes any difference?

In [91]:
steam_df_indie.groupby(by="Publishers").mean(numeric_only=True)

Unnamed: 0_level_0,Peak CCU,Required age,Price,DLC count,Windows,Mac,Linux,Metacritic score,User score,Positive,Negative,Score rank,Achievements,Recommendations,Average playtime forever,Average playtime two weeks,Median playtime forever,Median playtime two weeks,about_length,n_screens,n_movies,n_tags,n_languages,has_publisher,perceived_quality,no_user_reactions,total_reviews,has_support_email,has_support_url,has_website,month,day,year,success_1000_threshold,success_500_threshold
Publishers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
(Myco),1307.0,0.0,6.29,0.0,1.0,0.0,0.0,0.0,0.0,267.0,5.0,,17.0,334.0,114.0,114.0,114.0,114.0,1499.0,10.0,1.0,20.0,1.0,1.0,44.500000,0.0,272.0,1.0,0.0,1.0,12.0,14.0,2023.0,0.0,0.0
-乌鸦男-,1.0,0.0,0.99,0.0,1.0,0.0,0.0,0.0,0.0,108.0,10.0,,0.0,116.0,0.0,0.0,0.0,0.0,424.0,6.0,1.0,20.0,1.0,0.0,9.818182,0.0,118.0,1.0,1.0,0.0,8.0,8.0,2023.0,0.0,0.0
001,2.0,0.0,1.29,0.0,1.0,0.0,0.0,0.0,0.0,36.5,2.0,,0.0,0.0,0.0,0.0,0.0,0.0,1337.5,7.0,1.0,20.0,3.0,0.0,13.700000,0.0,38.5,1.0,1.0,1.0,4.5,6.5,2023.0,0.0,0.0
0UP GAMES,205.0,0.0,15.29,0.0,1.0,0.0,0.0,0.0,0.0,348.0,2.0,,14.0,371.0,0.0,0.0,0.0,0.0,1133.0,20.0,1.0,20.0,2.0,1.0,116.000000,0.0,350.0,1.0,0.0,0.0,2.0,20.0,2023.0,0.0,0.0
"100 GAMES,Gamersky Games",0.0,0.0,10.49,0.0,1.0,0.0,0.0,0.0,0.0,54.0,15.0,,0.0,0.0,0.0,0.0,0.0,0.0,1195.0,10.0,1.0,12.0,15.0,1.0,3.375000,0.0,69.0,1.0,0.0,0.0,3.0,17.0,2023.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
野生建模师,0.0,0.0,0.99,0.0,1.0,0.0,0.0,0.0,0.0,31.0,36.0,,0.0,0.0,0.0,0.0,0.0,0.0,326.0,5.0,1.0,20.0,8.0,0.0,0.837838,0.0,67.0,1.0,0.0,0.0,8.0,29.0,2023.0,0.0,0.0
"阿地,Miradil—DHU",2.0,0.0,6.99,0.0,1.0,0.0,0.0,0.0,0.0,33.0,21.0,,0.0,0.0,0.0,0.0,0.0,0.0,2465.0,12.0,2.0,10.0,1.0,0.0,1.500000,0.0,54.0,1.0,1.0,0.0,4.0,11.0,2023.0,0.0,0.0
阿基米德 尼古拉斯 夏,0.0,0.0,2.39,0.0,1.0,0.0,0.0,0.0,0.0,21.0,8.0,,0.0,0.0,0.0,0.0,0.0,0.0,750.0,9.0,1.0,19.0,2.0,1.0,2.333333,0.0,29.0,1.0,1.0,0.0,1.0,26.0,2023.0,0.0,0.0
风暴之刃工作室,1890.0,0.0,13.49,0.0,1.0,0.0,0.0,0.0,0.0,340.0,111.0,,66.0,470.0,0.0,0.0,0.0,0.0,500.0,9.0,2.0,20.0,2.0,1.0,3.035714,0.0,451.0,1.0,0.0,0.0,4.0,3.0,2023.0,0.0,0.0


In [87]:
list(numeric_columns).append("Publisher")
# s

In [88]:
numeric_columns

Index(['Peak CCU', 'Required age', 'Price', 'DLC count', 'Windows', 'Mac',
       'Linux', 'Metacritic score', 'User score', 'Positive', 'Negative',
       'Score rank', 'Achievements', 'Recommendations',
       'Average playtime forever', 'Average playtime two weeks',
       'Median playtime forever', 'Median playtime two weeks', 'about_length',
       'n_screens', 'n_movies', 'n_tags', 'n_languages', 'has_publisher',
       'perceived_quality', 'no_user_reactions', 'total_reviews',
       'has_support_email', 'has_support_url', 'has_website', 'month', 'day',
       'year', 'success_1000_threshold', 'success_500_threshold'],
      dtype='object')

In [95]:
sys.path.append("/Users/matheus/Projects/steam_analysis/steam-analysis/src/features")

In [5]:
from src.features.build_features import SteamDataTransformer

In [6]:
transformer = SteamDataTransformer()

In [7]:
transformer.fit(steam_df)

In [8]:
teste = transformer.transform(steam_df)

In [9]:
teste

Unnamed: 0,Name,Release date,Estimated owners,Peak CCU,Required age,Price,DLC count,About the game,Supported languages,Full audio languages,...,no_user_reactions,total_reviews,has_support_email,has_support_url,has_website,month,day,year,success_1000_threshold,success_500_threshold
0,Galactic Bowling,2008-10-21,0 - 20000,0,0,19.99,0,Galactic Bowling is an exaggerated and stylize...,"{{""'English'""}}",[],...,False,17,False,False,True,10,21,2008,0,0
1,Train Bandit,2017-10-12,0 - 20000,0,0,0.99,0,THE LAW!! Looks to be a showdown atop a train....,"{ ""'English'"", "" 'Italian'""}, "" 'Portuguese ...",[],...,False,58,True,False,True,10,12,2017,0,0
2,Jolt Project,2021-11-17,0 - 20000,0,0,4.99,0,Jolt Project: The army now has a new robotics ...,"{ ""'English'""}, {"" 'Portuguese - Brazil'""}",[],...,True,0,True,False,False,11,17,2021,0,0
3,Henosis™,2020-07-23,0 - 20000,0,0,5.99,0,HENOSIS™ is a mysterious 2D Platform Puzzler w...,"{ "" 'Portuguese'"", "" 'Korean'"", ""'English'"",...",[],...,False,3,True,True,True,7,23,2020,0,0
4,Two Weeks in Painland,2020-02-03,0 - 20000,0,0,0.00,0,ABOUT THE GAME Play as a hacker who has arrang...,"{ ""'English'""}, {"" 'Spanish - Spain'""}",[],...,False,58,True,True,True,2,3,2020,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78585,Mannerheim's Saloon Car,2024-01-02,0 - 0,0,0,0.00,0,Marshal Mannerheim’s Saloon Car is the train c...,"{ ""'English'""}, {"" 'Finnish'""}",['Finnish'],...,True,0,True,False,False,1,2,2024,0,0
78586,Beer Run,2024-01-03,0 - 0,0,0,0.00,0,Beer Run is an Indie game created to steal bee...,"{{""'English'""}}",[],...,True,0,True,False,False,1,3,2024,0,0
78587,My Friend The Spider,2024-01-04,0 - 0,0,0,0.00,0,A small 'horror' narrative game about isolatio...,"{{""'English'""}}",['English'],...,True,0,False,True,False,1,4,2024,0,0
78588,Path of Survivors,2024-01-08,0 - 0,0,0,3.99,0,Path of Survivors is a multi-class auto-battle...,"{{""'English'""}}",[],...,True,0,True,True,True,1,8,2024,0,0


In [21]:
X_transformed = steam_df.copy()
X_transformed = X_transformed.dropna(subset=["Screenshots"])
X_transformed = X_transformed.dropna(subset=["Movies"])
X_transformed = X_transformed.dropna(subset=["Genres"])
X_transformed = X_transformed.dropna(subset=["Genres"])
X_transformed = X_transformed["Tags"].fillna("empty")
X_transformed = X_transformed["About the game"].fillna("empty")
X_transformed = X_transformed.drop(columns="AppID")

KeyError: 'About the game'

In [10]:
steam_df.dropna(subset=["Screenshots"])

Unnamed: 0,Name,Release date,Estimated owners,Peak CCU,Required age,Price,DLC count,About the game,Supported languages,Full audio languages,...,no_user_reactions,total_reviews,has_support_email,has_support_url,has_website,month,day,year,success_1000_threshold,success_500_threshold
0,Galactic Bowling,2008-10-21,0 - 20000,0,0,19.99,0,Galactic Bowling is an exaggerated and stylize...,"{""'English'""}",[],...,False,17,False,False,True,10,21,2008,0,0
1,Train Bandit,2017-10-12,0 - 20000,0,0,0.99,0,THE LAW!! Looks to be a showdown atop a train....,"{"" 'German'"", "" 'Russian'"", "" 'Japanese'"", "" '...",[],...,False,58,True,False,True,10,12,2017,0,0
2,Jolt Project,2021-11-17,0 - 20000,0,0,4.99,0,Jolt Project: The army now has a new robotics ...,"{"" 'Portuguese - Brazil'"", ""'English'""}",[],...,True,0,True,False,False,11,17,2021,0,0
3,Henosis™,2020-07-23,0 - 20000,0,0,5.99,0,HENOSIS™ is a mysterious 2D Platform Puzzler w...,"{"" 'German'"", "" 'Korean'"", "" 'Portuguese'"", "" ...",[],...,False,3,True,True,True,7,23,2020,0,0
4,Two Weeks in Painland,2020-02-03,0 - 20000,0,0,0.00,0,ABOUT THE GAME Play as a hacker who has arrang...,"{"" 'Spanish - Spain'"", ""'English'""}",[],...,False,58,True,True,True,2,3,2020,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78585,Mannerheim's Saloon Car,2024-01-02,0 - 0,0,0,0.00,0,Marshal Mannerheim’s Saloon Car is the train c...,"{"" 'Finnish'"", ""'English'""}",['Finnish'],...,True,0,True,False,False,1,2,2024,0,0
78586,Beer Run,2024-01-03,0 - 0,0,0,0.00,0,Beer Run is an Indie game created to steal bee...,"{""'English'""}",[],...,True,0,True,False,False,1,3,2024,0,0
78587,My Friend The Spider,2024-01-04,0 - 0,0,0,0.00,0,A small 'horror' narrative game about isolatio...,"{""'English'""}",['English'],...,True,0,False,True,False,1,4,2024,0,0
78588,Path of Survivors,2024-01-08,0 - 0,0,0,3.99,0,Path of Survivors is a multi-class auto-battle...,"{""'English'""}",[],...,True,0,True,True,True,1,8,2024,0,0
