In [46]:
import pandas as pd
import re
import numpy as np
from tqdm import tqdm
import numpy as np
import json

In [47]:
df = pd.read_json('raw_data.json', lines=True)
df.head()

Unnamed: 0,id,name,rating,ratings,released,metacritic,ratings_count,genres
0,3498,Grand Theft Auto V,4.48,"[{'id': 5, 'title': 'exceptional', 'count': 16...",2013-09-17,96.0,2758,"[Action, Shooter]"
1,4200,Portal 2,4.61,"[{'id': 5, 'title': 'exceptional', 'count': 16...",2011-04-19,95.0,2405,"[Shooter, Puzzle]"
2,3328,The Witcher 3: Wild Hunt,4.68,"[{'id': 5, 'title': 'exceptional', 'count': 20...",2015-05-18,93.0,2552,[RPG]
3,5679,The Elder Scrolls V: Skyrim,4.39,"[{'id': 5, 'title': 'exceptional', 'count': 10...",2011-11-11,,2015,"[Action, RPG]"
4,12020,Left 4 Dead 2,4.08,"[{'id': 4, 'title': 'recommended', 'count': 76...",2009-11-17,89.0,1431,"[Action, Shooter]"


In [48]:
print(f'{df.describe()}\n')
print(f'{df.info()}\n')
print(f"released NA {df['released'].isna().sum()}\n")
print(f"metacritic NA {df['metacritic'].isna().sum()}\n")

                  id        rating   metacritic  ratings_count
count   11298.000000  11298.000000  2271.000000   11298.000000
mean    34967.389715      2.717164    73.616028      54.676226
std     57025.162227      1.504177    10.458881     144.520690
min         2.000000      0.000000    24.000000       4.000000
25%     10665.500000      2.000000    68.000000       6.000000
50%     19600.500000      3.250000    75.000000      13.000000
75%     38375.500000      3.870000    81.000000      36.000000
max    394092.000000      5.000000    97.000000    2758.000000

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11298 entries, 0 to 11297
Data columns (total 8 columns):
id               11298 non-null int64
name             11298 non-null object
rating           11298 non-null float64
ratings          11298 non-null object
released         11088 non-null object
metacritic       2271 non-null float64
ratings_count    11298 non-null int64
genres           11298 non-null object
dtypes: float

In [49]:
df[df.released.isna()].sort_values(by='ratings_count', ascending=False).head(23)

Unnamed: 0,id,name,rating,ratings,released,metacritic,ratings_count,genres
226,5687,The Playroom,2.21,"[{'id': 1, 'title': 'skip', 'count': 210, 'per...",,,441,[]
548,42309,Dead Island: Epidemic,2.2,"[{'id': 1, 'title': 'skip', 'count': 63, 'perc...",,,136,[Arcade]
794,42391,Nosgoth,2.54,"[{'id': 1, 'title': 'skip', 'count': 40, 'perc...",,,97,"[Action, Shooter]"
1491,10096,3DMark,3.43,"[{'id': 4, 'title': 'recommended', 'count': 44...",,,79,[]
1247,23576,GOD EATER RESURRECTION,3.2,"[{'id': 3, 'title': 'meh', 'count': 28, 'perce...",,,71,[Action]
2516,9551,Deep Rock Galactic,4.14,"[{'id': 4, 'title': 'recommended', 'count': 40...",,,61,"[Action, Indie]"
2104,10156,Aliens: Colonial Marines Collection,2.6,"[{'id': 1, 'title': 'skip', 'count': 21, 'perc...",,45.0,54,[Action]
4212,5673,Terminator Salvation,2.7,"[{'id': 3, 'title': 'meh', 'count': 21, 'perce...",,,40,"[Action, Shooter]"
807,18785,Sam & Max 303: They Stole Max's Brain!,2.76,"[{'id': 1, 'title': 'skip', 'count': 13, 'perc...",,,38,[Adventure]
3325,11587,Kenshi,4.16,"[{'id': 5, 'title': 'exceptional', 'count': 16...",,,38,"[Action, RPG, Strategy, Simulation, Indie]"


# Cleaning the dataset
    1. Filling or removing missing values
    2. Save the new dataset to a new file

In [50]:
# check which game is missing release date and manually fill those with over 30 votes
def add_release_dates(df):
    '''
    This function fills specific observations with specific release dates.
    Input:
        df: Original raw data dataframe
    Output:
        Partially filled release dates in the dataframe
    '''
    # star wars: obi wan
    df.at[11169, 'released'] = '2001-12-19'
    # god eater resurrection
    df.at[1247, 'released'] = '2015-10-29' 
    # deep rock galactic
    df.at[2516, 'released'] = '2018-02-28'
    # aliens: Colonial Marines Collection
    df.at[2104, 'released'] = '2013-02-12'
    # terminator Salvation
    df.at[4212, 'released'] = '2009-05-01'
    # kenshi
    df.at[3325, 'released'] = '2018-12-06'
    # Sam & Max 303: They Stole Max's Brain!
    df.at[807, 'released'] = '2010-06-22'
    # Sam & Max 303: Beyond the Alley of the Dolls
    df.at[812, 'released'] = '2010-07-20'
    # Sam & Max 303: The Penal Zone
    df.at[814, 'released'] = '2010-04-02'
    # Sam & Max 303: The City That Dares Not Sleep
    df.at[811, 'released'] = '2010-08-30'
    # Sam & Max 303:  The Tomb of Sammun-Mak
    df.at[813, 'released'] = '2010-05-18'
    # Sword of the Stars: The Pit
    df.at[2396, 'released'] = '2013-02-21'
    # SpellForce 2 Anniversary Edition
    df.at[2399, 'released'] = '2017-04-12'
    # raft
    df.at[4281, 'released'] = '2018-05-23'
    # Unheard
    df.at[5799, 'released'] = '2019-03-29'
    # Hector: Episode 2
    df.at[1167, 'released'] = '2011-06-25'
    
    # most of the games with higher vote count that are missing release date are closed/suspended
    # no use recommending them.
    df.dropna(subset=['released'], inplace=True)
    return df

df = add_release_dates(df)

# drop games that do not contain genres
df = df[df['genres'].map(lambda x: len(x) > 0)]

# asses the NAN value situation
print(f'{df.metacritic.isna().sum()}\n')
print(f'{df.info()}/n')

8255

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10509 entries, 0 to 11297
Data columns (total 8 columns):
id               10509 non-null int64
name             10509 non-null object
rating           10509 non-null float64
ratings          10509 non-null object
released         10509 non-null object
metacritic       2254 non-null float64
ratings_count    10509 non-null int64
genres           10509 non-null object
dtypes: float64(2), int64(2), object(4)
memory usage: 738.9+ KB
None/n


# Filling the metacritic scores for as many games as possible
    1. Retrieve the names of all games without metacritic score
    2. Use selenium to scrape scores

In [16]:
no_metacritic = np.array(df['name'][df['metacritic'].isna()])
# no_metacritic.to_csv('missing_metacritic.csv', index=False)
# no_metacritic.tail()

In [17]:
from selenium import webdriver
import requests

In [18]:
def to_json(dictionary, filename):
    '''
    Input:
        game: dictionary containing the data to write to the file
        filename: a string stating the name and path you wish to save the file at
    Returns:
        json file
    '''
    with open(f'{filename}.json', 'a') as file:
            file.write(f'{json.dumps(dictionary)}\n')
    return

In [19]:
def scrape_metacritic(names, filename):
    '''
    Input:
        names: list of games to scrape their score from metacritic
        filename: a string for saving the json file
    Returns:
        json file containing a list of dictionarys with the name of the game and the 
    '''
    scores = []
    for name in tqdm(names):
        game = {}
        # open metacritic
        driver = webdriver.Chrome()
        driver.get("https://www.metacritic.com/game")
        driver.implicitly_wait(14)
        
        try:
            # find search bar to search the, enter the game name, and click search
            driver.find_element_by_id('primary_search_box').send_keys(f'{name}')
            driver.implicitly_wait(2)
            driver.find_element_by_id('primary_menu_item_enter_search').click()

            # filter out non games
            driver.implicitly_wait(6)
            parent = driver.find_element_by_class_name("filter_area")
            driver.implicitly_wait(2)
            element = parent.find_elements_by_class_name("title")[2].click()
            
            # good reviews
            if driver.find_elements_by_xpath('(.//span[@class = "metascore_w medium game positive"][1])'):
                driver.implicitly_wait(0.5)
                score = driver.find_elements_by_xpath('(.//span[@class = "metascore_w medium game positive"][1])')[0].text
            
            # no review yet
            elif driver.find_elements_by_xpath('(.//span[@class = "metascore_w medium game tbd"][1])'):
                driver.implicitly_wait(0.5)
                score = driver.find_elements_by_xpath('(.//span[@class = "metascore_w medium game tbd"][1])')[0].text
            
            # mixed reviews
            elif driver.find_elements_by_xpath('(.//span[@class = "metascore_w medium game mixed"][1])'):
                driver.implicitly_wait(0.5)
                score = driver.find_elements_by_xpath('(.//span[@class = "metascore_w medium game mixed"][1])')[0].text
            
            # bad reviews
            elif driver.find_elements_by_xpath('(.//span[@class = "metascore_w medium game negative"][1])'):
                driver.implicitly_wait(0.5)
                score = driver.find_elements_by_xpath('(.//span[@class = "metascore_w medium game negative"][1])')[0].text
            
            else:
                score = "Not Found"             
        except:
            score = "Selenium Fail" 
        
        game[name] = score
        to_json(game, filename)
        scores.append(game)    
        driver.close()
    
    driver.quit()
    return scores
 
metacritic_scores = scrape_metacritic(no_metacritic[:2], 'test')

100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:51<00:00, 25.82s/it]


In [20]:
metacritic_scores

[{'The Elder Scrolls V: Skyrim': '84'}, {'Half-Life 2: Lost Coast': 'tbd'}]

In [64]:
games = requests.get('https://api.rawg.io/api/games?page=1').json()
games['results'][0]

{'id': 3498,
 'slug': 'grand-theft-auto-v',
 'name': 'Grand Theft Auto V',
 'released': '2013-09-17',
 'tba': False,
 'background_image': 'https://media.rawg.io/media/games/b11/b115b2bc6a5957a917bc7601f4abdda2.jpg',
 'rating': 4.47,
 'rating_top': 5,
 'ratings': [{'id': 5,
   'title': 'exceptional',
   'count': 1652,
   'percent': 58.23},
  {'id': 4, 'title': 'recommended', 'count': 966, 'percent': 34.05},
  {'id': 3, 'title': 'meh', 'count': 175, 'percent': 6.17},
  {'id': 1, 'title': 'skip', 'count': 44, 'percent': 1.55}],
 'ratings_count': 2814,
 'reviews_text_count': 14,
 'added': 9794,
 'added_by_status': {'yet': 169,
  'owned': 6334,
  'beaten': 2311,
  'toplay': 291,
  'dropped': 390,
  'playing': 299},
 'metacritic': 96,
 'playtime': 70,
 'suggestions_count': 401,
 'user_game': None,
 'reviews_count': 2837,
 'saturated_color': '0f0f0f',
 'dominant_color': '0f0f0f',
 'platforms': [{'platform': {'id': 4,
    'name': 'PC',
    'slug': 'pc',
    'image': None,
    'year_end': None,

In [100]:
b = df[type(df['genres']) != 'list' ]

KeyError: True

In [92]:
df

Unnamed: 0,id,name,rating,ratings,released,metacritic,ratings_count,genres
0,3498,Grand Theft Auto V,4.48,"[{'id': 5, 'title': 'exceptional', 'count': 16...",2013-09-17,96.0,2758,"[Action, Shooter]"
1,4200,Portal 2,4.61,"[{'id': 5, 'title': 'exceptional', 'count': 16...",2011-04-19,95.0,2405,"[Shooter, Puzzle]"
2,3328,The Witcher 3: Wild Hunt,4.68,"[{'id': 5, 'title': 'exceptional', 'count': 20...",2015-05-18,93.0,2552,[RPG]
3,5679,The Elder Scrolls V: Skyrim,4.39,"[{'id': 5, 'title': 'exceptional', 'count': 10...",2011-11-11,,2015,"[Action, RPG]"
4,12020,Left 4 Dead 2,4.08,"[{'id': 4, 'title': 'recommended', 'count': 76...",2009-11-17,89.0,1431,"[Action, Shooter]"
...,...,...,...,...,...,...,...,...
11293,27504,Sonic Boom: Shattered Crystal,0.00,"[{'id': 1, 'title': 'skip', 'count': 3, 'perce...",2014-11-11,,4,[]
11294,5899,Temple Run: Oz,0.00,"[{'id': 4, 'title': 'recommended', 'count': 2,...",2013-03-05,,4,"[Action, Adventure]"
11295,32329,Nitemare-3D,0.00,"[{'id': 1, 'title': 'skip', 'count': 2, 'perce...",1994-01-01,,4,"[Action, Shooter]"
11296,394092,Bemuzed,0.00,"[{'id': 5, 'title': 'exceptional', 'count': 5,...",2019-11-22,,4,"[Puzzle, Platformer]"


In [36]:
a = []
len(a)

0

In [114]:
pd.DataFrame(data, index=data.keys())

AttributeError: 'list' object has no attribute 'keys'

In [97]:
df.genres[11293]

[]

In [94]:
df.ratings[0]

[{'id': 5, 'title': 'exceptional', 'count': 1620, 'percent': 58.25},
 {'id': 4, 'title': 'recommended', 'count': 947, 'percent': 34.05},
 {'id': 3, 'title': 'meh', 'count': 173, 'percent': 6.22},
 {'id': 1, 'title': 'skip', 'count': 41, 'percent': 1.47}]

In [130]:
raw_scores_df = pd.read_csv('test.csv')
raw_scores_df.head()

Unnamed: 0,name
0,82


In [None]:
raw_scores_df = pd.read_json('metacritic_scores.json', lines=True)
def fill_metacritic(df, scores_df, column):
    
    # create a single column df with all the scraped data found on the diagonal
    scores_df.set_index([scores_df.columns], inplace=True)
    scores_df = pd.DataFrame(np.diag(scores_df), index=scores_df.index, columns=['metacritic'])
    
    # fill the missing results from the scraped scores by index
    df.set_index(column, inplace=True)
    df.metacritic = df['metacritic'].fillna(scores_df['metacritic'])
    return df

In [23]:
scores_df.reset_index()

Unnamed: 0,metacritic
The Elder Scrolls V: Skyrim,84
Half-Life 2: Lost Coast,tbd
Metro 2033,90
Warframe,86
God of War,Selenium Fail
...,...
Organ Trail: Director's Cut,86
"Warhammer 40,000: Eternal Crusade",Selenium Fail
NARUTO SHIPPUDEN: Ultimate Ninja STORM 4,79
Star Saviors,Not Found


In [None]:
cleaned_df = df.copy(deep=True)
cleaned_df.to_csv('cleaner_raw_data.csv')

In [None]:
uniques = pd.unique(cleaned_df['Genres'].values.ravel('K'))
uniques[0][0]

In [None]:
def add_genre_columns(df, column):
    # convert the strings in genres column to list
    df, uniques = str_tolist(df, column)
    
    #create dataframe with the column names
    genres_df = pd.DataFrame(columns=uniques)
    
    # join the original df with the new df
    df = df.join(column_df, how='left')
    
    # add function that uses the list strings to fill the values
    return df, list_strings

def fill_genre_columns(df, uniques):
    df2 = df.copy(deep=True)
    zeros = np.zeros(len(uniques))
    
    all_dummies = []
    for row in tqdm(df2.itertuples()):
        i = 1
        uniques_dic = dict(zip(uniques, zeros))
        while i < len(uniques):
            if row in uniques:
                uniques_dic[row[i]] = 1
                i += 1

        all_dummies.append(uniques_dic)
    return pd.DataFrame(all_dummies, columns=uniques)
    
df, uniques = add_genre_columns(cleaned_df, 'Genres')

In [None]:
test = cleaned_df.copy(deep=True)

In [None]:
def populate_df(df):
    """ This function turns the characters dataframe into dummy varaibles dataframe"""
    uniques = pd.unique(df.values.ravel('K'))
    zeros = np.zeros(len(uniques))

    # main df protection
    df = df.copy(deep=True)

    all_dummies = []
    for row in df.itertuples():
        i = 1
        uniques_dic = dict(zip(uniques, zeros))
        while i < 6:
            uniques_dic[row[i]] = 1
            i += 1

        all_dummies.append(uniques_dic)
    return pd.DataFrame(all_dummies, columns=uniques)

In [None]:
import re

df3 = cleaned_df['Genres'].str.replace(r'\']', '')
df3 = pd.DataFrame(df3.str.split().values.tolist())
df3[0] = df3[0].str.replace(r'[', '')
df3[0] = df3[0].str.replace(r'\'', '')
df3[0] = df3[0].str.replace(r'\,', '')

df3
# df2 = cleaned_df.copy(deep=True)
# zeros = np.zeros(len(uniques))

# if any("abc" in s for s in some_list):

test_df = df2.join(df3, how='left')

i = 0
for row in tqdm(test_df.itertuples()):
    print(f'{row}\n')
    uniques_dic = dict(zip(uniques, zeros))
    print(f'{uniques_dic}\n')
    while i < len(uniques):
            if any(row in unique for unique in uniques):
                uniques_dic[row[i]] = 1
                i += 1
                if i == 20:
                    break
    break

In [None]:
def create_genre_list(series):
    genre_list = []
    for genres in series:
        genre_list.append(genres)
    return genre_list

a = create_genre_list(genres_series)
type(a[-1])

In [218]:
with open('metacritic_scores.json', 'r') as file:
    dude = [json.loads(f) for f in file]

dude

[{'The Elder Scrolls V: Skyrim': '84'},
 {'Half-Life 2: Lost Coast': 'tbd'},
 {'Metro 2033': '90'},
 {'Warframe': '86'},
 {'God of War': 'Selenium Fail'},
 {'Red Dead Redemption 2': '97'},
 {'Horizon Zero Dawn': 'Selenium Fail'},
 {'Half-Life 2: Deathmatch': 'tbd'},
 {'Batman: Arkham Knight': 'Selenium Fail'},
 {'Uncharted 4: A Thief’s End': 'Not Found'},
 {'Half-Life Deathmatch: Source': 'tbd'},
 {"Garry's Mod": 'tbd'},
 {'Bloodborne': '92'},
 {"Marvel's Spider-Man": '87'},
 {'Assassin’s Creed IV Black Flag': 'Not Found'},
 {'The Walking Dead': '89'},
 {'BioShock Remastered': 'tbd'},
 {'Viscera Cleanup Detail: Shadow Warrior': 'tbd'},
 {'The Walking Dead: Season 1': '87'},
 {'PlayerUnknown’s Battlegrounds': 'Not Found'},
 {'Red Dead Redemption': '97'},
 {'Heavy Rain': '78'},
 {'Dead Island': '80'},
 {'Paladins': 'Selenium Fail'},
 {'SEGA Mega Drive and Genesis Classics': 'tbd'},
 {'Detroit: Become Human': '78'},
 {'Metro 2033 Redux': '90'},
 {'Darksiders Warmastered Edition': '81'},
 

In [22]:
raw_scores_df = pd.read_json('metacritic_scores.json', lines=True)
raw_scores_df

Unnamed: 0,The Elder Scrolls V: Skyrim,Half-Life 2: Lost Coast,Metro 2033,Warframe,God of War,Red Dead Redemption 2,Horizon Zero Dawn,Half-Life 2: Deathmatch,Batman: Arkham Knight,Uncharted 4: A Thief’s End,...,Super Mario Bros. 3,NBA 2K18,Bleed,Call of Cthulhu,Wargroove,THE KING OF FIGHTERS 2002,Depression Quest,The Tower Of Elements,Space Pilgrim Episode IV: Sol,The Beatles: Rock Band
0,84.0,,,,,,,,,,...,,,,,,,,,,
1,,tbd,,,,,,,,,...,,,,,,,,,,
2,,,90.0,,,,,,,,...,,,,,,,,,,
3,,,,86.0,,,,,,,...,,,,,,,,,,
4,,,,,Selenium Fail,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1395,,,,,,,,,,,...,,,,,,tbd,,,,
1396,,,,,,,,,,,...,,,,,,,tbd,,,
1397,,,,,,,,,,,...,,,,,,,,tbd,,
1398,,,,,,,,,,,...,,,,,,,,,tbd,


In [51]:
df2 = df.copy(deep=True)

In [33]:
def reshape_metacritic_df(filename):
    '''
    '''
    metacritic_scores = pd.read_json(filename, lines=True)
    metacritic_scores.set_index([metacritic_scores.columns], inplace=True)
    return pd.DataFrame(np.diag(metacritic_scores), index=metacritic_scores.index, columns=['metacritic'])

def extract_selenium_fails(filename):
    '''
    Input:
        filename: file name for a json containing games and their respective metacritic scores
    Retruns:
        numpy array containing only the games that do not have selenium scores  
    '''
    with open(filename, 'r') as file:
            meta_scores = [json.loads(f) for f in file]
    
    fails = []
    for game in meta_scores:
        for k, v in game.items():
            if v == 'Not Found' or v == 'Selenium Fail':
                fails.append(k)

    return np.array(fails)

def fill_metacritic(df, filename, column):
    '''
    '''  
    # create a single column df with all the scraped data found on the diagonal
    scores_column = reshape_metacritic_df(filename)

    # get the games that did not scrape the first time to try again later.
    selenium_fails = extract_selenium_fails(filename)
    
    # fill the missing results from the scraped scores by index
    df.set_index('name', inplace=True)
    df['metacritic'] = df[column].fillna(scores_column[column])
    return df, selenium_fails

In [52]:
df2

Unnamed: 0,id,name,rating,ratings,released,metacritic,ratings_count,genres
0,3498,Grand Theft Auto V,4.48,"[{'id': 5, 'title': 'exceptional', 'count': 16...",2013-09-17,96.0,2758,"[Action, Shooter]"
1,4200,Portal 2,4.61,"[{'id': 5, 'title': 'exceptional', 'count': 16...",2011-04-19,95.0,2405,"[Shooter, Puzzle]"
2,3328,The Witcher 3: Wild Hunt,4.68,"[{'id': 5, 'title': 'exceptional', 'count': 20...",2015-05-18,93.0,2552,[RPG]
3,5679,The Elder Scrolls V: Skyrim,4.39,"[{'id': 5, 'title': 'exceptional', 'count': 10...",2011-11-11,,2015,"[Action, RPG]"
4,12020,Left 4 Dead 2,4.08,"[{'id': 4, 'title': 'recommended', 'count': 76...",2009-11-17,89.0,1431,"[Action, Shooter]"
...,...,...,...,...,...,...,...,...
11291,33076,Hexxagon,0.00,"[{'id': 4, 'title': 'recommended', 'count': 4,...",1993-01-01,,4,"[Strategy, Board Games]"
11294,5899,Temple Run: Oz,0.00,"[{'id': 4, 'title': 'recommended', 'count': 2,...",2013-03-05,,4,"[Action, Adventure]"
11295,32329,Nitemare-3D,0.00,"[{'id': 1, 'title': 'skip', 'count': 2, 'perce...",1994-01-01,,4,"[Action, Shooter]"
11296,394092,Bemuzed,0.00,"[{'id': 5, 'title': 'exceptional', 'count': 5,...",2019-11-22,,4,"[Puzzle, Platformer]"


In [53]:
df2, fails = fill_metacritic(df2, 'metacritic_scores.json', 'metacritic')
df2

Unnamed: 0_level_0,id,rating,ratings,released,metacritic,ratings_count,genres
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Grand Theft Auto V,3498,4.48,"[{'id': 5, 'title': 'exceptional', 'count': 16...",2013-09-17,96,2758,"[Action, Shooter]"
Portal 2,4200,4.61,"[{'id': 5, 'title': 'exceptional', 'count': 16...",2011-04-19,95,2405,"[Shooter, Puzzle]"
The Witcher 3: Wild Hunt,3328,4.68,"[{'id': 5, 'title': 'exceptional', 'count': 20...",2015-05-18,93,2552,[RPG]
The Elder Scrolls V: Skyrim,5679,4.39,"[{'id': 5, 'title': 'exceptional', 'count': 10...",2011-11-11,84,2015,"[Action, RPG]"
Left 4 Dead 2,12020,4.08,"[{'id': 4, 'title': 'recommended', 'count': 76...",2009-11-17,89,1431,"[Action, Shooter]"
...,...,...,...,...,...,...,...
Hexxagon,33076,0.00,"[{'id': 4, 'title': 'recommended', 'count': 4,...",1993-01-01,,4,"[Strategy, Board Games]"
Temple Run: Oz,5899,0.00,"[{'id': 4, 'title': 'recommended', 'count': 2,...",2013-03-05,,4,"[Action, Adventure]"
Nitemare-3D,32329,0.00,"[{'id': 1, 'title': 'skip', 'count': 2, 'perce...",1994-01-01,,4,"[Action, Shooter]"
Bemuzed,394092,0.00,"[{'id': 5, 'title': 'exceptional', 'count': 5,...",2019-11-22,,4,"[Puzzle, Platformer]"


In [30]:
type(df2)

tuple

In [198]:
df.reset_index(inplace=True)

In [201]:
df.drop(columns=['index'], inplace=True)

Unnamed: 0,name,id,rating,ratings,released,metacritic,ratings_count,genres
0,Grand Theft Auto V,3498,4.48,"[{'id': 5, 'title': 'exceptional', 'count': 16...",2013-09-17,96.0,2758,"[Action, Shooter]"
1,Portal 2,4200,4.61,"[{'id': 5, 'title': 'exceptional', 'count': 16...",2011-04-19,95.0,2405,"[Shooter, Puzzle]"
2,The Witcher 3: Wild Hunt,3328,4.68,"[{'id': 5, 'title': 'exceptional', 'count': 20...",2015-05-18,93.0,2552,[RPG]
3,The Elder Scrolls V: Skyrim,5679,4.39,"[{'id': 5, 'title': 'exceptional', 'count': 10...",2011-11-11,,2015,"[Action, RPG]"
4,Left 4 Dead 2,12020,4.08,"[{'id': 4, 'title': 'recommended', 'count': 76...",2009-11-17,89.0,1431,"[Action, Shooter]"
...,...,...,...,...,...,...,...,...
11099,Sonic Boom: Shattered Crystal,27504,0.00,"[{'id': 1, 'title': 'skip', 'count': 3, 'perce...",2014-11-11,,4,[]
11100,Temple Run: Oz,5899,0.00,"[{'id': 4, 'title': 'recommended', 'count': 2,...",2013-03-05,,4,"[Action, Adventure]"
11101,Nitemare-3D,32329,0.00,"[{'id': 1, 'title': 'skip', 'count': 2, 'perce...",1994-01-01,,4,"[Action, Shooter]"
11102,Bemuzed,394092,0.00,"[{'id': 5, 'title': 'exceptional', 'count': 5,...",2019-11-22,,4,"[Puzzle, Platformer]"


In [9]:
with open('metacritic_scores.json', 'r') as file:
            meta_scores = [json.loads(f) for f in file]
    
fails = []
for game in meta_scores:
    for k, v in game.items():
        if v == 'Not Found' or v == 'Selenium Fail':
            fails.append(k)
        
np.array(fails)


array(['God of War', 'Horizon Zero Dawn', 'Batman: Arkham Knight',
       'Uncharted 4: A Thief’s End', 'Assassin’s Creed IV Black Flag',
       'PlayerUnknown’s Battlegrounds', 'Paladins', 'NieR:Automata',
       'Half-Life: Opposing Force', 'Tom Clancy’s The Division',
       'Serious Sam Fusion 2017 (beta)', 'Watch Dogs 2',
       'Ratchet & Clank (2016)', 'Battlefield 4',
       'Borderlands Game of the Year Enhanced',
       'Sekiro: Shadows Die Twice', 'LEGO The Hobbit',
       'STAR WARS Battlefront', 'God of War III Remastered',
       'Fortnite: Save The World',
       'Resident Evil Revelations 2 / Biohazard Revelations 2',
       'The Long Dark', 'Ricochet', 'Minecraft', 'H1Z1: King of the Kill',
       'Forza Horizon 4', 'Sunset Overdrive', 'Besiege',
       'The Ship: Single Player', 'Never Alone (Kisima Ingitchuna)',
       'ORION: Prelude', 'No Time To Explain Remastered',
       'Tom Clancy’s Splinter Cell Blacklist', 'Really Big Sky',
       'Crash Bandicoot 2: Cortex 