# Часть №1. Создание набора данных

In [1]:
from collections import defaultdict
from glob import glob
import re
import numpy as np
import pandas as pd

# 1. Загрузка данных

In [2]:
games = pd.DataFrame()
for file in glob('Data/metacritic_*.csv'):
    df = pd.read_csv(file, sep=';', header=0)
    df['release_date'] = pd.to_datetime(df['release_date'], errors='coerce')
    df = df[(df['release_date'] < pd.datetime(2019, 1, 1)) & (df['url'])]
    df['user_score'].replace('tbd', np.NaN, inplace=True)
    df.drop_duplicates(subset=['title'], inplace=True)
    df.fillna({'critic_count': 0, 'critic_positive': 0, 'critic_mixed': 0, 'critic_negative': 0,
               'user_score': 0,'user_count': 0, 'user_positive': 0, 'user_mixed': 0, 'user_negative': 0},
              axis=0, inplace=True)
    df.dropna(axis=0, subset=['critic_score'], inplace=True)
    games = pd.concat([games, df])
games = games.sort_values(['title', 'critic_count']).reset_index(drop=True)

In [3]:
dtypes = {'publisher': 'category',
          'critic_score': 'float16',
          'critic_count': 'int16',
          'critic_positive': 'int16',
          'critic_mixed': 'int16',
          'critic_negative': 'int16',
          'user_score': 'float16',
          'user_count': 'int16',
          'user_positive': 'int16',
          'user_mixed': 'int16',
          'user_negative': 'int16',          
          'developer': 'category',
          'rating': 'category'}

games = games.astype(dtypes).drop('url', axis=1)
games['user_score'] = games['user_score'].apply(lambda x: np.round(x * 10, 2))

In [4]:
games.head()

Unnamed: 0,title,platform,publisher,release_date,critic_score,critic_count,critic_positive,critic_mixed,critic_negative,user_score,...,user_positive,user_mixed,user_negative,developer,genre,players,rating,director,writer,composer
0,#IDARB,Xbox One,Other Ocean Interactive,2015-01-30,77.0,31,23,8,0,70.0,...,17,3,1,Other Ocean Interactive,,,,,,
1,#KILLALLZOMBIES,PlayStation 4,Beatshapers,2014-10-28,63.0,6,0,6,0,66.99,...,2,0,0,Beatshapers,,,,,,
2,#KILLALLZOMBIES,Xbox One,Digerati Distribution,2016-08-10,53.0,9,0,6,3,0.0,...,0,0,1,Beatshapers,,,,,,
3,'Splosion Man,Xbox 360,Twisted Pixel Games,2009-07-22,84.0,52,48,4,0,78.01,...,14,1,1,Twisted Pixel Games,"Action, General, Platformer, Platformer, 2D, 2D",4 Online,E10+,,,
4,'n Verlore Verstand,Xbox One,Skobbejak Games,2017-04-21,57.0,4,0,4,0,0.0,...,0,0,0,Skobbejak Games,"Adventure, 3D, First-Person",No Online Multiplayer,E,,,


In [5]:
games.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18150 entries, 0 to 18149
Data columns (total 21 columns):
title              18150 non-null object
platform           18150 non-null object
publisher          18150 non-null category
release_date       18150 non-null datetime64[ns]
critic_score       18150 non-null float16
critic_count       18150 non-null int16
critic_positive    18150 non-null int16
critic_mixed       18150 non-null int16
critic_negative    18150 non-null int16
user_score         18150 non-null float64
user_count         18150 non-null int16
user_positive      18150 non-null int16
user_mixed         18150 non-null int16
user_negative      18150 non-null int16
developer          18119 non-null category
genre              10224 non-null object
players            11460 non-null object
rating             10224 non-null category
director           1218 non-null object
writer             644 non-null object
composer           723 non-null object
dtypes: category(3), dateti

In [6]:
games.describe()

Unnamed: 0,critic_score,critic_count,critic_positive,critic_mixed,critic_negative,user_score,user_count,user_positive,user_mixed,user_negative
count,18150.0,18150.0,18150.0,18150.0,18150.0,18150.0,18150.0,18150.0,18150.0,18150.0
mean,inf,19.821763,11.099725,7.235427,1.486612,60.57431,28.519504,17.259614,4.734711,6.525179
std,13.03125,17.12186,15.120001,7.296757,3.176466,26.655206,115.172326,70.599299,16.241757,48.88725
min,8.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,62.0,7.0,2.0,2.0,0.0,55.0,2.0,1.0,0.0,0.0
50%,71.0,14.0,5.0,5.0,0.0,70.0,5.0,3.0,1.0,1.0
75%,79.0,26.0,14.0,10.0,2.0,78.01,16.0,10.0,3.0,3.0
max,99.0,118.0,118.0,60.0,43.0,93.98,4319.0,2729.0,497.0,2550.0


# 2. Очистка данных

In [7]:
def cleaning_feature(feature, pattern):
    if pattern:
        # Замена некорректных наименований
        for old, new in pattern.items():
            feature = re.sub(old, new, feature)
    # Удаление дубликатов
    feature = ', '.join(set(feature.split(', ')))
    return feature

## 2.1. Количество игроков

In [8]:
games['players'] = games['players'].replace({'1 Player': 'No Online Multiplayer',
                                             '1-2': '1-4',
                                             '1-3': '1-4',
                                             '1-5': '1-8',
                                             '1-6': '1-8',
                                             '1-10': '1-16',
                                             '1-12': '1-16',
                                             'Up to 4': '1-4',
                                             '2': '1-4',
                                             '2  Online': '1-4',
                                             '4  Online': '1-4',
                                             '8  Online': '1-8',
                                             'Up to 8': '1-8',
                                             '16  Online': '1-16',
                                             'Up to 6': '1-8',
                                             'Up to 16': '1-16',
                                             'Up to 12': '1-16',
                                             '6  Online': '1-8',
                                             'Up to 10': '1-16',
                                             '32  Online': '1-32',
                                             '10  Online': '1-16',
                                             '12  Online': '1-16',
                                             '24  Online': '1-32',
                                             'Up to 22': '1-32',
                                             'Up to 18': '1-32',
                                             'Up to 64': '1-64',
                                             'Up to 5': '1-8',
                                             'Up to 3': '1-4',
                                             'Up to 24': '1-32',
                                             'Up to 20': '1-32',
                                             'Up to 32': '1-32',
                                             'Up to 40': '1-64',
                                             '64  Online': '1-64',
                                             '14  Online': '1-16',
                                             'Up to more than 64': '64+',
                                             'Up to 30': '1-32',
                                             '5  Online': '1-8',
                                             '3  Online': '1-4',
                                             '64+  Online': '64+',
                                             '44  Online': '1-64',
                                             'Up to 60': '1-64',
                                             'Up to 14': '1-16',
                                             'Up to 9': '1-16',
                                             '1-24': '1-32',
                                             'Up to 33': '1-32'})

In [9]:
games['players'].value_counts()

No Online Multiplayer    5484
1-4                      3578
1-8                      1057
1-16                      588
Online Multiplayer        321
1-32                      225
Massively Multiplayer     144
1-64                       48
64+                        15
Name: players, dtype: int64

## 2.2. Возрастной рейтинг

In [10]:
games['rating'] = games['rating'].replace({'K-A': 'E',
                                          'RP': np.NaN})

In [11]:
games['rating'].value_counts()

T       3666
E       3182
M       2133
E10+    1233
AO         2
Name: rating, dtype: int64

## 2.3. Издатель

In [12]:
games['publisher'] = games['publisher'].replace({'Activision Blizzard': 'Activision',
                                                'Activision Value': 'Activision',
                                                'EA Sports': 'Electronic Arts',
                                                'EA Sports Big': 'Electronic Arts',
                                                'EA Games': 'Electronic Arts',
                                                'EA Partners': 'Electronic Arts',
                                                'EA Mobile': 'Electronic Arts',
                                                'SCEA': 'Sony Interactive Entertainment',
                                                'SCE Australia': 'Sony Interactive Entertainment',
                                                'SCEA Sports Studio': 'Sony Interactive Entertainment',
                                                'SCEE': 'Sony Interactive Entertainment',
                                                'Microsoft': 'Microsoft Game Studios',
                                                'Square EA': 'Square Electronic Arts',
                                                'Atari SA': 'Atari',
                                                'Warner Bros.': 'Warner Bros. Interactive Entertainment',
                                                '2K Sports': '2K Games',
                                                '2K Play': '2K Games',
                                                'Bethesda Game Studios': 'Bethesda Softworks',
                                                'Namco Bandai Games': 'Bandai Namco Games',
                                                'Namco Bandai Games America': 'Bandai Namco Games',
                                                'Bandai America Inc.': 'Bandai',
                                                'Namco Networks America': 'Namco',
                                                'Acclaim Games': 'Acclaim',
                                                'Lucas Learning': 'LucasArts',
                                                'Paradox Development Studio': 'Paradox Interactive',
                                                'Atlus Co.': 'Atlus',
                                                'Sierra Online': 'Sierra Entertainment',
                                                'Sierra Games': 'Sierra Entertainment',
                                                'Majesco': 'Majesco Games',
                                                'Tecmo Koei America Corp.': 'Koei Tecmo Games',
                                                'Tecmo Koei Games': 'Koei Tecmo Games',
                                                'Dreamcatcher': 'DreamCatcher Interactive',
                                                'D3': 'D3Publisher',
                                                'tiny Build GAMES': 'tinyBuild',
                                                'Agetec': 'Agetec Inc.',
                                                'Blizzard Classic Arcade': 'Blizzard Entertainment',
                                                'Curve Studios': 'Curve Digital',
                                                'Curve Digital Games': 'Curve Digital',
                                                'SNK': 'SNK Playmore',
                                                'UTV Ignition Games': 'Ignition Entertainment',
                                                '10tons Ltd': '10tons',
                                                '10tons Entertainment': '10tons',
                                                'Iceberg Entertainment': 'Iceberg Interactive',
                                                'Slitherine Software': 'Slitherine',
                                                'Buena Vista Interactive': 'Buena Vista Games',
                                                'MileStone Inc.': 'Milestone S.r.l',
                                                'Milestone': 'Milestone S.r.l',
                                                'Badland Indie': 'Badland Games',
                                                'SegaSoft': 'Sega',
                                                'Sega Europe': 'Sega',
                                                'Grip Digital': 'Grip Games',
                                                '1C': '1C Company',
                                                '1C-Softclub': '1C Company',
                                                'Hudson': 'Hudson Entertainment',
                                                'Bit Composer': 'bitComposer Games',
                                                'Disney Interactive': 'Disney Interactive Studios',
                                                'Disney Online': 'Disney Interactive Studios',
                                                'Sold Out Software': 'Sold Out',
                                                'Ratalaika Games S.L.': 'Ratalaika Games',
                                                'Phoenix Online Studios': 'Phoenix Online Publishing',
                                                'GameMill Publishing': 'GameMill Entertainment',
                                                'Starbreeze publishing AB': 'Starbreeze',
                                                'Gearbox Software': 'Gearbox Publishing',
                                                'LOOT Entertainment': 'LOOT Interactive',
                                                'Modus': 'Modus Games',
                                                'Tozai Games, Inc.': 'Tozai Games',
                                                'Vision Game Publishing Limited': 'Vision Games Publishing LTD',
                                                'Excalibur Publishing Limited': 'Excalibur Publishing',
                                                'Vector Unit Inc.': 'Vector Unit',
                                                'Yuke\'s Company of America': 'Yuke\'s',
                                                'Capybara Games': 'Capy Games',
                                                'Squad': 'SQUADTeam',
                                                'Forever Entertainment S.A.': 'Forever Entertainment',
                                                'Uppercut Games': 'Uppercut Games Pty Ltd',
                                                'Fullbright': 'The Fullbright Company',
                                                'Nyu Media, Ltd.': 'Nyu Media',
                                                'Marvelous Inc.': 'Marvelous AQL',
                                                'Alawar Entertainment, Inc': 'Alawar Entertainment',
                                                'Alawar Premium': 'Alawar Entertainment',
                                                'Application Systems Heidelberg Software GmbH': 'Application Systems Heidelberg',
                                                'AwesomeGamesStudio': 'Awesome Games Studio',
                                                'BITBOX S.L.': 'Bitbox Ltd.',
                                                'Black Pants': 'Black Pants Studio',
                                                'Blue Wizard Digital Ltd.': 'Blue Wizard Digital',
                                                'Coco Cucumber': 'Cococucumber',
                                                'Crema Games': 'CremaGames',
                                                'Cyan, Inc.': 'Cyan Worlds',
                                                'Dead Mage Studio': 'Dead Mage',
                                                'Digital Dreams': 'Digital Dreams Entertainment LLC',
                                                'Dolores Entertainment SL': 'Dolores Entertainment',
                                                'Double Damage Games, Inc.': 'Double Damage Games',
                                                'E-Home Entertainment Development Co., Ltd': 'E-Home Entertainment',
                                                'E-Home Entertainment Developerment Co., Ltd': 'E-Home Entertainment',
                                                'EightyEight Games LTD': 'EightyEight Games',
                                                'Forever Entertainment S.A.': 'Forever Entertainment',
                                                'Fun Com': 'Funcom',
                                                'Hot B': 'Hot-B',
                                                'Introversion': 'Introversion Software',
                                                'Jagex Ltd.': 'Jagex Games Studio',
                                                'JoyBits, Inc': 'JoyBits',
                                                'MileStone Inc.': 'Milestone',
                                                'Minority Media Inc.': 'Minority',
                                                'Mobot Studios Inc': 'Mobot Studios LLC',
                                                'PlayStation Mobile Inc.': 'Sony Interactive Entertainment',
                                                'Sony Platform Publishing': 'Sony Interactive Entertainment',
                                                'Vogster Entertainment, LLC': 'Vogster',
                                                'Watermelon Development': 'Watermelon',
                                                'WeirdBeard B.V.': 'WeirdBeard',
                                                'Yukitama': 'Yukitama Creative Industries',
                                                'Active Gaming Media Inc.': 'Active Gaming Media',
                                                'Ankama': 'Ankama Games',
                                                'Excalbur Games': 'Excalibur Publishing',
                                                'Aqua Plus': 'Aquaplus',
                                                'Ascaron Entertainment GmbH': 'Ascaron',
                                                'cdv Software': 'CDV Software Entertainment AG',
                                                'EXTEND Interactive, LLC.': 'Extend Interactive',
                                                'Gravity Corporation': 'Gravity',
                                                'Gravity Interactive': 'Gravity',
                                                'NIS America': 'Nippon Ichi Software',
                                                'Slick Entertainment Inc.': 'Slick Entertainment',
                                                'SquareSoft': 'Square',
                                                'Tik Games, LLC': 'TikGames',
                                                'TreeFallStudios': 'TreeFall Studios',
                                                'CHIBIG, S.L.U.': 'CHIBIG',
                                                'Bluehole, Inc.': 'Bluehole Inc.',
                                                'Unigine Corp, Russia': 'Unigine Corp',
                                                'Frozenbyte, Inc.': 'Frozenbyte Inc.'})

In [13]:
games['publisher'].value_counts()

Electronic Arts                           1281
Ubisoft                                    800
Activision                                 709
Sony Interactive Entertainment             541
Sega                                       526
THQ                                        445
Capcom                                     440
Konami                                     438
Microsoft Game Studios                     383
Bandai Namco Games                         379
Nintendo                                   377
Square Enix                                329
2K Games                                   305
Atari                                      286
Telltale Games                             274
Warner Bros. Interactive Entertainment     252
Midway                                     198
Bethesda Softworks                         184
Eidos Interactive                          182
Codemasters                                161
Atlus                                      158
Focus Home In

## 2.4. Разработчик

In [14]:
games['developer'] = games['developer'].replace({'BadFly Interactive, a.s': 'BadFly Interactive',
                                                 'Phantom EFX, Inc.': 'Phantom EFX Inc.',
                                                 'Eurocom Entertainment Software, Eurocom': 'Eurocom Entertainment Software',
                                                 'Frozenbyte, Inc.': 'Frozenbyte Inc.',
                                                 'Z-Axis, Ltd.': 'Z-Axis Ltd.',
                                                 'Pipeworks Software, Inc.': 'Pipeworks Software Inc.',
                                                 'Jackbox Games, Inc.': 'Jackbox Games Inc.',
                                                 'Luxoflux, Inc.': 'Luxoflux Inc.',
                                                 'Ascaron Entertainment GmbH, Ascaron Entertainment': 'Ascaron Entertainment',
                                                 '10tons Entertainment, 10tons': '10tons',
                                                 'Arcen Games, LLC': 'Arcen Games LLC',
                                                 '10tons Ltd, 10tons': '10tons',
                                                 'Tik Games, LLC': 'Tik Games LLC',
                                                 'Digital Leisure Inc., Digital Leisure': 'Digital Leisure Inc.',
                                                 'Camouflaj, LLC': 'Camouflaj LLC',
                                                 'Dreadlocks Ltd, Dreadlocks': 'Dreadlocks Ltd',
                                                 'Image Epoch, Imageepoch': 'Image Epoch',
                                                 '3000AD, Inc.': '3000AD Inc.',
                                                 'Valuewave Co.,Ltd.': 'Valuewave Co. Ltd.',
                                                 'bitComposer Games, bitComposer': 'bitComposer Games',
                                                 'Orbital Media, Inc.': 'Orbital Media Inc.',
                                                 'Papaya Studios, Papaya Studio': 'Papaya Studios',
                                                 'Voltex, Inc.': 'Voltex Inc.',
                                                 'Young Horses, Inc': 'Young Horses Inc.',
                                                 'Funatics Development GmbH, Funatics Software, Funatics': 'Funatics Software, Funatics',
                                                 'Thekla, Inc': 'Thekla Inc.',
                                                 'Final Form Games, LLC': 'Final Form Games LLC',
                                                 'Nintendo EAD Tokyo , Nintendo Software Technology': 'Nintendo EAD Tokyo, Nintendo Software Technology',
                                                 'Holistic Design, Inc.': 'Holistic Design Inc.',
                                                 'Honor Code, Inc.': 'Honor Code Inc.',
                                                 'Massive Damage, Inc.': 'Massive Damage Inc.',
                                                 'Tribute Games, Tribute Games Inc.': 'Tribute Games',
                                                 'Blue Wizard Digital Ltd., Blue Wizard Digital': 'Blue Wizard Digital',
                                                 'Sickhead Games, LLC, Eric Barone': 'Sickhead Games LLC, Eric Barone',
                                                 'RideonJapan,Inc.': 'RideonJapan Inc.',
                                                 'RideonJapan,Inc., Ride On': 'RideonJapan Inc., Ride On',
                                                 'BadFly Interactive, a.s, BadFly Interactive': 'BadFly Interactive',
                                                 'The Balance, Inc': 'The Balance Inc.',
                                                 'THQ Australia, THQ Studio Australia': 'THQ Australia',
                                                 'Art Co., Ltd.': 'Art Co. Ltd.',
                                                 'Tozai Games, Inc.': 'Tozai Games Inc.',
                                                 'Asmik Ace Entertainment, Inc': 'Asmik Ace Entertainment Inc.',
                                                 'Asobism, Co. ltd.': 'Asobism Co. Ltd.',
                                                 'Big Sandwich Games, Inc.': 'Big Sandwich Games Inc.',
                                                 'Phosphor Games Studio, LLC': 'Phosphor Games Studio LLC',
                                                 'Coco Cucumber, Cococucumber': 'Cococucumber',
                                                 'Capcom, Pipeworks Software, Inc.': 'Capcom, Pipeworks Software Inc.',
                                                 'Pipeworks Software, Inc., NeoBards Entertainment': 'Pipeworks Software Inc., NeoBards Entertainment',
                                                 '10tons Entertainment, 10tons Ltd, 10tons': '10tons',
                                                 'Reloaded Games Inc., Reloaded Games': 'Reloaded Games',
                                                 'Castle Pixel, LLC.': 'Castle Pixel LLC.',
                                                 '17 Bit, 17-Bit': '17-Bit',
                                                 'Fallen Earth, LLC': 'Fallen Earth LLC',
                                                 'FIX Korea, Co. LTD': 'FIX Korea Co. Ltd.',
                                                 '24 Carat Games, 24 Caret Games': '24 Carat Games',
                                                 'FAKT Software GmBH, FAKT Software': 'FAKT Software',
                                                 'Frozenbyte, Inc., GameTrust': 'Frozenbyte Inc., GameTrust',
                                                 'Act 3 Games, LLC': 'Act 3 Games LLC',
                                                 'Forever Entertainment S.A., Forever Entertainment': 'Forever Entertainment',
                                                 'Anvil Drop, LLC': 'Anvil Drop LLC',
                                                 'Application Systems Heidelberg Software GmbH, Application Systems Heidelberg': 'Application Systems Heidelberg',   
                                                 'Absolutist Ltd, Absolutist': 'Absolutist Ltd.',
                                                 'Chillingo Ltd, Chillingo Ltd., Chillingo': 'Chillingo Ltd',
                                                 'CIRCLE Entertainment, RideonJapan,Inc.': 'CIRCLE Entertainment, RideonJapan Inc.',
                                                 'CHIBIG, S.L.U.': 'CHIBIG',
                                                 'CCR, Inc': 'CCR  Inc.',
                                                 'Cyan, Inc.': 'Cyan Inc.',
                                                 'Creat Studios, Tik Games, LLC': 'Creat Studios, Tik Games LLC',
                                                 'Cliffhanger Productions, Cliffhanger Productions': 'Cliffhanger Productions',
                                                 'BigBox VR, Inc.': 'BigBox VR Inc.',
                                                 'Beep Games, Inc.': 'Beep Games Inc.',
                                                 'EXTEND Interactive, LLC., Extend Interactive': 'Extend Interactive',
                                                 'Buddy Cops, LLC': 'Buddy Cops LLC',
                                                 'Double Damage Games, Inc., Double Damage Games': 'Double Damage Games',
                                                 'Bluehole, Inc., PUBG Corporation': 'Bluehole Inc., PUBG Corporation',
                                                 'Bluehole, Inc.': 'Bluehole Inc.',
                                                 'E-Home Entertainment Developerment Co., Ltd': 'E-Home Entertainment',
                                                 'E-Home Entertainment Development Co., Ltd': 'E-Home Entertainment',
                                                 'Shanghai FantaBlade Network Technology Co., Ltd.': 'Shanghai FantaBlade Network Technology Co. Ltd.',
                                                 'Seed Studios, Lda': 'Seed Studios',
                                                 'Subaltern Games, LLC': 'Subaltern Games LLC', 
                                                 'Speedco Shooting Sports, Inc.': 'Speedco Shooting Sports Inc.',
                                                 'Seamless Entertainment, Inc.': 'Seamless Entertainment Inc.',
                                                 'Reality Twist GmbH, Reality Twist': 'Reality Twist',
                                                 'Reloaded Productions, Reloaded Games Inc., Reloaded Games': 'Reloaded Productions, Reloaded Games',
                                                 'Revival Productions, LLC': 'Revival Productions LLC',
                                                 'Viva Media, Viva Media, LLC': 'Viva Media',
                                                 'Vogster Entertainment, LLC': 'Vogster Entertainment LLC',
                                                 'bitComposer Entertainment AG, bitComposer': 'bitComposer Games',
                                                 'Team6 Game Studios, Team 6 Games Studio': 'Team 6 Games Studio',
                                                 'Unigine Corp, Russia': 'Unigine Corp',
                                                 'JoyBits Ltd., JoyBits': 'JoyBits',
                                                 'JoyBits, Inc': 'JoyBits',
                                                 'Joystick Labs, LLC': 'Joystick Labs LLC',   
                                                 'Jundroo, LLC': 'Jundroo LLC',
                                                 'Kyodai, Kyodai Ltd.': 'Kyodai Ltd.',
                                                 'LRDGames, Inc.': 'LRDGames Inc.',                                                 
                                                 'Lion Shield, LLC': 'Lion Shield LLC',
                                                 'Link Kit Co., Ltd.': 'Link Kit Co. Ltd.',
                                                 'LegionVR, LLC': 'LegionVR LLC',
                                                 'Gelid Games, Inc.': 'Gelid Games Inc.',
                                                 'Games Farm, Games Farm s.r.o.': 'Games Farm',
                                                 'IronNos Co.,Ltd.': 'IronNos Co. Ltd.',
                                                 'International Games System Co., Ltd., 5pb': 'International Games System Co. Ltd., 5pb',
                                                 'International Games System Co., Ltd.': 'International Games System Co. Ltd.',
                                                 'Homebear Studios, HomeBearStudio': 'Homebear Studios',
                                                 'Hit-Point Co., Ltd.': 'Hit-Point Co. Ltd.',
                                                 'OVER FENCE CO.,LTD., Over Fence': 'Over Fence',
                                                 'PM Studios Inc., P.M. Studios s.r.l': 'PM Studios Inc.',
                                                 'Polarity Flow, PolarityFlow': 'Polarity Flow',
                                                 'Phantom EFX, Inc., Phantom EFX': 'Phantom EFX Inc.',
                                                 'MileStone Inc., Klon Co., Ltd.': 'MileStone Inc., Klon Co. Ltd.',
                                                 'Marauder Interactive, LLC': 'Marauder Interactive LLC',
                                                 'Narcosis Team, Honor Code, Inc.': 'Narcosis Team, Honor Code Inc.',
                                                 'Nike, Inc.': 'Nike Inc.',
                                                 'MotiveTime, Ltd.': 'MotiveTime Ltd.',
                                                 'MunkyFun, Inc.': 'MunkyFun Inc.'})                                                

In [15]:
games['developer'].value_counts()

Telltale Games                               281
Capcom                                       272
EA Sports                                    190
Konami                                       167
EA Canada                                    167
Ubisoft Montreal                             146
Ubisoft                                      142
EA Tiburon                                    99
Nintendo                                      97
Omega Force                                   94
Codemasters                                   92
Visual Concepts                               91
Square Enix                                   81
Electronic Arts                               80
Traveller's Tales                             78
BioWare                                       73
Zen Studios                                   70
Sega                                          69
Arc System Works                              67
Midway                                        67
Treyarch            

## 2.5. Жанр

In [16]:
pattern = {'Soccer': 'Football',
           'Massively Multiplayer': 'Massively Multiplayer Online',
           'Other': 'Miscellaneous',
           'Breeding/Constructing': 'Breeding / Constructing',
           'Application': 'Miscellaneous',
           'Text': 'Miscellaneous',
           'Videos': 'Miscellaneous'}

In [17]:
games['genre'] = games[pd.notnull(games['genre'])]['genre'].apply(lambda x: cleaning_feature(x, pattern)).reindex(games.index)

In [18]:
genres = defaultdict(int)
# Подсчет игр каждого жанра
for game in games[pd.notnull(games['genre'])]['genre'].str.split(', ').tolist():
    for genre in game:
        genres[genre] += 1
pd.Series(genres).sort_values(ascending=False)

Action                3543
General               2143
Action Adventure      1496
Shooter               1389
Sports                1176
Arcade                1099
Role-Playing          1075
Miscellaneous          963
First-Person           917
Strategy               869
3D                     853
Sci-Fi                 804
Traditional            768
Platformer             764
Modern                 754
Racing                 733
Fantasy                732
Driving                723
2D                     681
Sim                    582
Third-Person           556
Simulation             529
Adventure              522
Action RPG             450
Fighting               419
Historic               399
Real-Time              377
Beat-'Em-Up            362
Automobile             333
Team                   318
                      ... 
Large Spaceship         12
Helicopter              11
Rugby                   10
Truck                   10
Roguelike               10
Ski / Snowboard         10
A

## 2.6. Режиссер

In [19]:
pattern = {'Amy Henning': 'Amy Hennig',
           'Daisuke Kanada': 'Daisuke Kaneda',
           'Dave \'Foots\' Footman': 'Dave Footman',
           'Eric "Giz" Gerwirtz': 'Eric Gewirtz',
           'Eric "Giz" Gewirtz': 'Eric Gewirtz',
           'Eric Gerwirtz': 'Eric Gewirtz',
           'Eric \'Giz\' Gewirtz': 'Eric Gewirtz',
           'Fumihko Yasuda': 'Fumihiko Yasuda',
           'Garry M. Gaber': 'Garry Gaber',
           'Glen A. Schofield': 'Glen Schofield',
           'Hideaki Isuno': 'Hideaki Itsuno',
           'Hiroyuki Ito': 'Hiroyuki Itou',
           'Julian  Eggebrecht': 'Julian Eggebrecht',
           'Kazuhide Nakazawaa': 'Kazuhide Nakazawa',
           'Kenji Nakamo': 'Kenji Nakano',
           'Masaya Kobauashi': 'Masaya Kobayashi',
           'Michael Ancel': 'Michel Ancel',
           'Micheal Ancel': 'Michel Ancel',
           'Mikami Shinji': 'Shinji Mikami',
           'Mimoru Toyota': 'Minoru Toyota',
           'Patrice DГ©silets': 'Patrice Desilets',
           'OHKOOMI 16': 'Koji Okohara',
           'Shinichi Shimormura': 'Shinichi Shimomura',
           'Shinjiro Takada': 'Shinjiro Takata',
           'SWERY (Hidetaka Suehiro)': 'Hidetaka Suehiro',
           'Tomoaki AndЕЌ': 'Tomoaki Ando',
           'Yasushi Tsuji': 'Yasushi Tsujii',
           'YeonKyu Choi': 'Yeon-Kyu Choi'}

In [20]:
games['director'] = games[pd.notnull(games['director'])]['director'].apply(lambda x: cleaning_feature(x, pattern)).reindex(games.index)

In [21]:
directors = defaultdict(int)
# Подсчет игр по каждому режиссеру
for game in games[pd.notnull(games['director'])]['director'].str.split(', ').tolist():
    for director in game:
        directors[director] += 1
pd.Series(directors).sort_values(ascending=False)

Jon Burton                 24
Shinji Mikami              11
Satoru Iwata               10
Hideaki Itsuno             10
Hiroshi Yamauchi           10
Ed Boon                     8
Hideo Kojima                8
Dennis Lenart               8
Todd Howard                 7
Glen Schofield              7
Nick Herman                 7
Yoko Taro                   6
Eric Gewirtz                6
Jonathan Morin              6
Sean Velasco                6
Yoshinori Kawano            6
Hideki Kamiya               6
Amy Hennig                  6
Morio Kishimoto             6
Ryota Ito                   6
Hidetaka Miyazaki           6
Eiji Aonuma                 5
David Cage                  5
Motomu Toriyama             5
Michel Ancel                5
Sean Ainsworth              5
Eric Parsons                5
Alex Ward                   5
Eiro Shirahama              5
Kenji Oguro                 5
                           ..
Shohei Mizutani             1
Shuichi Sakurazaki          1
Sangyoun L

## 2.7. Сценарист

In [22]:
pattern = {'Adrian Vershin': 'Adrian Vershinin',
           'Flint Dillie': 'Flint Dille',
           'JT Petty': 'John Thomas Petty',
           'Marianne Krawczyke': 'Marianne Krawczyk',
           'Michael Stemmie': 'Michael Stemmle',
           'Nick Breck': 'Nick Breckon',
           'Ragnar TГёrnquist': 'Ragnar Tornquist',
           'Robney Gibbs': 'Rodney Gibbs'}

In [23]:
games['writer'] = games[pd.notnull(games['writer'])]['writer'].apply(lambda x: cleaning_feature(x, pattern)).reindex(games.index)

In [24]:
writers = defaultdict(int)
# Подсчет игр по каждому сценаристу
for game in games[pd.notnull(games['writer'])]['writer'].str.split(', ').tolist():
    for writer in game:
        writers[writer] += 1
pd.Series(writers).sort_values(ascending=False)

Corey May                     17
Dan Houser                    11
Shuyo Murata                  10
Sean Vanaman                  10
Rhianna Pratchett              9
Chet Faliszek                  9
Ken Pontac                     8
Nicole Martinez                8
Makoto Goya                    8
Jeffrey Yohalem                7
Darby McDevitt                 7
Hidenari Inamura               7
Warren Graff                   7
Drew Holmes                    7
Rupert Humphries               7
Andrew Grant                   7
Bingo Morihashi                6
Odd Ahlgren                    6
Patrick Gilmore                5
Mark Hoffmeier                 5
Michael Unsworth               5
Bruce Feirstein                5
Stephen Sharples               5
Wil Evans                      5
Marcin Blacha                  5
Jonathan Smith                 5
Borys Pugacz Muraszkiewicz     5
Maureen Tan                    5
Walt Williams                  5
Tony Barnes                    5
          

## 2.8. Композитор

In [25]:
pattern = {'Alexander Roder': 'Alexander Roeder',
           'Atli Г–rvarsson': 'Atli Orvarsson',
           'Hitomi Hurokawa': 'Hitomi Kurokawa',
           'Jared Emerson Johnson': 'Jared Emerson-Johnson',
           'Keisuke Ito': 'Keisuke Itou',
           'Marcin PrzybyЕ‚owicz': 'Marcin Przybylowicz',
           'MASA': 'Masayoshi Sasaki',
           'Masashi Hamazu': 'Masashi Hamauzu',
           'Naofume Harada': 'Naofumi Harada',
           'Peter McConnel': 'Peter McConnell',
           'Rei Kondou': 'Rei Kondoh',
           'TomГЎЕЎ DvoЕ™ГЎk': 'Tomas Dvorak',
           'Yasuaki Bunbun Fujita': 'Yasuaki Fujita'}

In [26]:
games['composer'] = games[pd.notnull(games['composer'])]['composer'].apply(lambda x: cleaning_feature(x, pattern)).reindex(games.index)

In [27]:
composers = defaultdict(int)
# Подсчет игр по каждому композитору
for game in games[pd.notnull(games['composer'])]['composer'].str.split(', ').tolist():
    for composer in game:
        composers[composer] += 1
pd.Series(composers).sort_values(ascending=False)

Masami Ueda              14
Rei Kondoh               14
Motoi Sakuraba           12
Sarah Schachner          11
Jason Graves             10
Tomoya Ohtani            10
Cris Velasco              9
Ludvig Forssell           9
Malcolm Kirby Jr          8
Grant Kirkhope            8
Markus Schmidt            8
Rob Westwood              8
Shusaku Uchiyama          8
Inon Zur                  8
Jared Emerson-Johnson     8
Tetsuya Shibata           7
Masashi Hamauzu           7
Kota Suzuki               7
Nobuo Uematsu             7
Rom Di Prisco             7
Jack Wall                 7
Kenichi Tokoi             7
Hideaki Kuroda            7
Nathan Grigg              7
Masafumi Takada           7
Jesper Kyd                7
Akihiko Narita            7
Manami Matsumae           7
Jake Kaufman              7
Norihiko Hibino           6
                         ..
Gareth Coker              1
Kayoko Matsushima         1
Yasuhisa Baba             1
Naofumi Tsuruyama         1
Ryo Yamazaki        

In [28]:
pd.Series(composers).sort_values(ascending=False).to_csv('composer.csv', sep=';')

# 3. Сохранение данных

In [29]:
games.to_csv('Data/metacritic.csv', sep=';', index=False)

In [30]:
games.head(100)

Unnamed: 0,title,platform,publisher,release_date,critic_score,critic_count,critic_positive,critic_mixed,critic_negative,user_score,...,user_positive,user_mixed,user_negative,developer,genre,players,rating,director,writer,composer
0,#IDARB,Xbox One,Other Ocean Interactive,2015-01-30,77.0,31,23,8,0,70.00,...,17,3,1,Other Ocean Interactive,,,,,,
1,#KILLALLZOMBIES,PlayStation 4,Beatshapers,2014-10-28,63.0,6,0,6,0,66.99,...,2,0,0,Beatshapers,,,,,,
2,#KILLALLZOMBIES,Xbox One,Digerati Distribution,2016-08-10,53.0,9,0,6,3,0.00,...,0,0,1,Beatshapers,,,,,,
3,'Splosion Man,Xbox 360,Twisted Pixel Games,2009-07-22,84.0,52,48,4,0,78.01,...,14,1,1,Twisted Pixel Games,"General, Platformer, Action, 2D",1-4,E10+,,,
4,'n Verlore Verstand,Xbox One,Skobbejak Games,2017-04-21,57.0,4,0,4,0,0.00,...,0,0,0,Skobbejak Games,"Adventure, 3D, First-Person",No Online Multiplayer,E,,,
5,.detuned,PlayStation 3,Sony Interactive Entertainment,2009-10-15,46.0,9,0,4,5,36.99,...,0,4,12,.theprodukkt,"Music, Rhythm, Action, Miscellaneous",No Online Multiplayer,E,,,
6,.hack//G.U. Last Recode,PC,Bandai Namco Games,2017-11-03,69.0,7,3,4,0,71.99,...,4,0,1,CyberConnect2,"Compilation, Miscellaneous",No Online Multiplayer,T,,,
7,.hack//G.U. Last Recode,PlayStation 4,Bandai Namco Games,2017-11-03,76.0,21,14,7,0,78.01,...,6,0,0,CyberConnect2,"Compilation, Miscellaneous",No Online Multiplayer,T,,,
8,.hack//G.U. vol. 1//Rebirth,PlayStation 2,Bandai Namco Games,2006-10-24,69.0,21,8,13,0,81.02,...,2,1,1,CyberConnect2,"Role-Playing, Action RPG",No Online Multiplayer,T,,,
9,.hack//G.U. vol. 2//Reminisce,PlayStation 2,Bandai Namco Games,2007-05-08,61.0,21,3,17,1,82.97,...,1,0,0,CyberConnect2,"Role-Playing, Action RPG",No Online Multiplayer,T,,,
