# Analyse des plus gros owners :

#### Dans le but de retirer les outliers (c'est-à-dire les plus gros studios de productions) pour avoir une répartition assez équilibrée

In [1]:
pip install sqldf


Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import collections 
import sqldf

In [3]:
df = pd.read_csv('../raw_data/data_clean.csv')
df.head(3)

Unnamed: 0,steam_appid,name,release_date,english,developer,publisher,platforms,categories,genres,steamspy_tags,...,world_war_i,world_war_ii,wrestling,zombies,e_sports,owner_median,owner_lower,owner_upper,revenue,has_a_website
0,10,Counter-Strike,2000-11-01,1,Valve,Valve,windows;mac;linux,"['Multi-player', 'Online Multi-Player', 'Local...",['Action'],"['Action', 'FPS', 'Multiplayer']",...,0,0,0,0,550,15000000.0,10000000,20000000,107850000.0,0
1,30,Day of Defeat,2003-05-01,1,Valve,Valve,windows;mac;linux,"['Multi-player', 'Valve Anti-Cheat enabled']",['Action'],"['FPS', 'World War II', 'Multiplayer']",...,5,122,0,0,0,7500000.0,5000000,10000000,29925000.0,1
2,50,Half-Life: Opposing Force,1999-11-01,1,Gearbox Software,Valve,windows;mac;linux,"['Single-player', 'Multi-player', 'Valve Anti-...",['Action'],"['FPS', 'Action', 'Sci-fi']",...,0,0,0,0,0,7500000.0,5000000,10000000,29925000.0,0


In [4]:
df['developer'].nunique()

17047

## Classement des publishers par nombre de jeux produits 

In [5]:
# Nombre de publishers uniques

df['publisher'].nunique()

14312

In [6]:
# Obtenir le nombre de jeux produits par publisher => Output : {publisher : nombre de jeux produits}

pub_count = collections.Counter(df['publisher'])

In [7]:
# Classement des publishers dans l'ordre décroissant

best_pub = sorted([(v,k) for k,v in pub_count.items()],reverse=True)

In [8]:
best_pub_df = pd.DataFrame(best_pub, columns=['pub_nb_games','publisher'])
best_pub_df.head(20)

Unnamed: 0,pub_nb_games,publisher
0,212,Big Fish Games
1,131,Strategy First
2,100,Ubisoft
3,98,THQ Nordic
4,96,Sekai Project
5,94,Choice of Games
6,88,Dagestan Technology
7,87,1C Entertainment
8,81,Square Enix
9,77,SEGA


In [9]:
best_pub_df.shape

(14312, 2)

In [10]:
final_df = pd.merge(df, best_pub_df, how="right", on="publisher")

In [11]:
final_df.columns

Index(['steam_appid', 'name', 'release_date', 'english', 'developer',
       'publisher', 'platforms', 'categories', 'genres', 'steamspy_tags',
       ...
       'world_war_ii', 'wrestling', 'zombies', 'e_sports', 'owner_median',
       'owner_lower', 'owner_upper', 'revenue', 'has_a_website',
       'pub_nb_games'],
      dtype='object', length=397)

In [12]:
publisher_df = final_df[['steam_appid','name','release_date','developer','publisher','pub_nb_games','owner_median']]

publisher_df

Unnamed: 0,steam_appid,name,release_date,developer,publisher,pub_nb_games,owner_median
0,7340,Azada,2010-04-21,Big Fish Games,Big Fish Games,212,10000.0
1,50910,Professor Fizzwizzle and the Molten Mystery,2010-04-21,Big Fish Games,Big Fish Games,212,10000.0
2,50920,Hidden Expedition: Amazon,2010-04-21,Big Fish Games,Big Fish Games,212,10000.0
3,50930,Hidden Expedition: Everest,2010-04-21,Big Fish Games,Big Fish Games,212,10000.0
4,50940,Hidden Expedition: Titanic,2010-04-21,Big Fish Games,Big Fish Games,212,10000.0
...,...,...,...,...,...,...,...
26862,1060770,"Die, zombie sausage, die!",2019-04-24,AuroraCorp,Aurora software,1,10000.0
26863,858100,Grimshade,2019-03-26,TALEROCK,Asterion Games,1,10000.0
26864,832420,ICED VR,2018-05-29,Anea_Duo_Dev,Anea_Duo_Dev,1,10000.0
26865,659100,Skyfall,2017-07-28,AYE Technology,AYE Technology,1,10000.0


In [13]:
# On va considérer que les plus gros owners sont ceux qui ont une médiane des ventes à 75 000 000 min

final_df.owner_median.value_counts()

10000.0        18529
35000.0         3018
75000.0         1675
150000.0        1367
350000.0        1240
750000.0         493
1500000.0        286
3500000.0        191
7500000.0         42
15000000.0        20
35000000.0         3
75000000.0         2
150000000.0        1
Name: owner_median, dtype: int64

In [14]:
# Recenser les outliers qui ont un owner median > ou = 7 500 000 (soit 68 jeux)

pub_plus = publisher_df[publisher_df['owner_median']>=7500000]

In [15]:
pub_plus.sort_values(by='owner_median',ascending=False).head(40)

Unnamed: 0,steam_appid,name,release_date,developer,publisher,pub_nb_games,owner_median
4044,570,Dota 2,2013-07-09,Valve,Valve,24,150000000.0
20149,578080,PLAYERUNKNOWN'S BATTLEGROUNDS,2017-12-21,PUBG Corporation,PUBG Corporation,1,75000000.0
4047,730,Counter-Strike: Global Offensive,2012-08-21,Valve;Hidden Path Entertainment,Valve,24,75000000.0
24400,230410,Warframe,2013-03-25,Digital Extremes,Digital Extremes,1,35000000.0
18563,304930,Unturned,2017-07-07,Smartly Dressed Games,Smartly Dressed Games,1,35000000.0
4041,440,Team Fortress 2,2007-10-10,Valve,Valve,24,35000000.0
8393,291480,Warface,2014-07-01,My.com,My.com,6,15000000.0
4045,620,Portal 2,2011-04-18,Valve,Valve,24,15000000.0
4049,4000,Garry's Mod,2006-11-29,Facepunch Studios,Valve,24,15000000.0
5776,271590,Grand Theft Auto V,2015-04-13,Rockstar North,Rockstar Games,14,15000000.0


In [16]:
pub_plus.sort_values(by='owner_median',ascending=False).tail(28)

Unnamed: 0,steam_appid,name,release_date,developer,publisher,pub_nb_games,owner_median
12098,292030,The Witcher® 3: Wild Hunt,2015-05-18,CD PROJEKT RED,CD PROJEKT RED,3,7500000.0
20615,273110,Counter-Strike Nexon: Zombies,2014-10-07,"Valve Corporation, Nexon Korea Corporation",Nexon Korea Corporation,1,7500000.0
20732,550650,Black Squad,2017-07-28,NS STUDIO,NS STUDIO,1,7500000.0
21683,224260,No More Room in Hell,2013-10-31,No More Room in Hell Team,Lever Games,1,7500000.0
13520,105600,Terraria,2011-05-16,Re-Logic,Re-Logic,2,7500000.0
4040,420,Half-Life 2: Episode Two,2007-10-10,Valve,Valve,24,7500000.0
11846,252490,Rust,2018-02-08,Facepunch Studios,Facepunch Studios,3,7500000.0
11216,363970,Clicker Heroes,2015-05-13,Playsaurus,Playsaurus,3,7500000.0
4042,500,Left 4 Dead,2008-11-17,Valve,Valve,24,7500000.0
4038,380,Half-Life 2: Episode One,2006-06-01,Valve,Valve,24,7500000.0


In [17]:
publisher_df['publisher'].nunique()

14312

In [18]:
# Publishers ont sorti entre 1 et 5 jeux // >

publisher_df[publisher_df['pub_nb_games']>5].nunique()

steam_appid     8681
name            8677
release_date    2088
developer       3293
publisher        520
pub_nb_games      59
owner_median      13
dtype: int64

In [19]:
desc = pd.read_csv('../raw_data/steam_description_data.csv')
desc.shape

(27334, 4)

In [20]:
publisher_df.shape

(26867, 7)

In [21]:
desc_mg = pd.merge(publisher_df, desc, how="inner", on="steam_appid")

In [22]:
desc_mg.shape

(26867, 10)

In [23]:
desc_mg

Unnamed: 0,steam_appid,name,release_date,developer,publisher,pub_nb_games,owner_median,detailed_description,about_the_game,short_description
0,7340,Azada,2010-04-21,Big Fish Games,Big Fish Games,212,10000.0,"Trapped in a magical puzzle book, the adventur...","Trapped in a magical puzzle book, the adventur...","Trapped in a magical puzzle book, the adventur..."
1,50910,Professor Fizzwizzle and the Molten Mystery,2010-04-21,Big Fish Games,Big Fish Games,212,10000.0,"<p>While on a well deserved vacation, Professo...","<p>While on a well deserved vacation, Professo...","While on a well deserved vacation, Professor F..."
2,50920,Hidden Expedition: Amazon,2010-04-21,Big Fish Games,Big Fish Games,212,10000.0,Big Fish Games Studios takes you on an Adventu...,Big Fish Games Studios takes you on an Adventu...,Big Fish Games Studios takes you on an Adventu...
3,50930,Hidden Expedition: Everest,2010-04-21,Big Fish Games,Big Fish Games,212,10000.0,Fresh from a successful exploration of the wre...,Fresh from a successful exploration of the wre...,Fresh from a successful exploration of the wre...
4,50940,Hidden Expedition: Titanic,2010-04-21,Big Fish Games,Big Fish Games,212,10000.0,"On April 14, 1912, the great steamship RMS Tit...","On April 14, 1912, the great steamship RMS Tit...","On April 14, 1912, the great steamship RMS Tit..."
...,...,...,...,...,...,...,...,...,...,...
26862,1060770,"Die, zombie sausage, die!",2019-04-24,AuroraCorp,Aurora software,1,10000.0,Arcade for the little ones. Hold on to the tab...,Arcade for the little ones. Hold on to the tab...,Arcade for the little ones. Hold on to the tab...
26863,858100,Grimshade,2019-03-26,TALEROCK,Asterion Games,1,10000.0,"<img src=""https://steamcdn-a.akamaihd.net/stea...","<img src=""https://steamcdn-a.akamaihd.net/stea...",Grimshade is a party-based role-playing game i...
26864,832420,ICED VR,2018-05-29,Anea_Duo_Dev,Anea_Duo_Dev,1,10000.0,"This VR version of the game ICED, we have trie...","This VR version of the game ICED, we have trie...",The protagonist goes winter fishing on a lake ...
26865,659100,Skyfall,2017-07-28,AYE Technology,AYE Technology,1,10000.0,<strong>Skyfall</strong> it is old school firs...,<strong>Skyfall</strong> it is old school firs...,2217. Distant future. We act as an engineer ni...


In [24]:
publisher_df

Unnamed: 0,steam_appid,name,release_date,developer,publisher,pub_nb_games,owner_median
0,7340,Azada,2010-04-21,Big Fish Games,Big Fish Games,212,10000.0
1,50910,Professor Fizzwizzle and the Molten Mystery,2010-04-21,Big Fish Games,Big Fish Games,212,10000.0
2,50920,Hidden Expedition: Amazon,2010-04-21,Big Fish Games,Big Fish Games,212,10000.0
3,50930,Hidden Expedition: Everest,2010-04-21,Big Fish Games,Big Fish Games,212,10000.0
4,50940,Hidden Expedition: Titanic,2010-04-21,Big Fish Games,Big Fish Games,212,10000.0
...,...,...,...,...,...,...,...
26862,1060770,"Die, zombie sausage, die!",2019-04-24,AuroraCorp,Aurora software,1,10000.0
26863,858100,Grimshade,2019-03-26,TALEROCK,Asterion Games,1,10000.0
26864,832420,ICED VR,2018-05-29,Anea_Duo_Dev,Anea_Duo_Dev,1,10000.0
26865,659100,Skyfall,2017-07-28,AYE Technology,AYE Technology,1,10000.0


In [25]:
publisher_df.to_csv(r'../raw_data/publisher_df.csv', index = False)