# Practicing Python Collections using Video Game sales data

In [1]:
import json

## The Dataset

In [2]:
with open('../../data/games_data.json') as fp:
    games = json.load(fp)

In [3]:
len(games)

16291

Let's take a peak into the data:

In [4]:
games[ :5]  # gives us the first 5 games

[{'rank': 1,
  'name': 'Wii Sports',
  'platform': 'Wii',
  'year': 2006,
  'genre': 'Sports',
  'publisher': 'Nintendo',
  'na_sales': 41.49,
  'eu_sales': 29.02,
  'jp_sales': 3.77,
  'other_sales': 8.46,
  'global_sales': 82.74},
 {'rank': 2,
  'name': 'Super Mario Bros.',
  'platform': 'NES',
  'year': 1985,
  'genre': 'Platform',
  'publisher': 'Nintendo',
  'na_sales': 29.08,
  'eu_sales': 3.58,
  'jp_sales': 6.81,
  'other_sales': 0.77,
  'global_sales': 40.24},
 {'rank': 3,
  'name': 'Mario Kart Wii',
  'platform': 'Wii',
  'year': 2008,
  'genre': 'Racing',
  'publisher': 'Nintendo',
  'na_sales': 15.85,
  'eu_sales': 12.88,
  'jp_sales': 3.79,
  'other_sales': 3.31,
  'global_sales': 35.82},
 {'rank': 4,
  'name': 'Wii Sports Resort',
  'platform': 'Wii',
  'year': 2009,
  'genre': 'Sports',
  'publisher': 'Nintendo',
  'na_sales': 15.75,
  'eu_sales': 11.01,
  'jp_sales': 3.28,
  'other_sales': 2.96,
  'global_sales': 33.0},
 {'rank': 5,
  'name': 'Pokemon Red/Pokemon Blue',

## Analytical Questions:

### 1. Total number of games published by Nintendo?

Determine and input the aggregate number of games that Nintendo has published. This should be presented as an integer value.

> Please note that you should count the games by their unique name, rather than by their rank.

In [5]:
nintendo_games = set()
for game in games:
    if game['publisher'] == 'Nintendo':
        nintendo_games.add(game['name'])

len(nintendo_games)

662

### 2. Game with Highest Global Sales

What is the name of a game with the highest global sales ?

In [6]:
top_seller = None
top_sales_count = 0
for game in games:
    global_sales = game['global_sales']
    if global_sales > top_sales_count:
        top_seller = game['name']
        top_sales_count = global_sales

print(f"Global Sales Top Seller: {top_seller} with {top_sales_count} sales.")    

Global Sales Top Seller: Wii Sports with 82.74 sales.


### 3. Total number of games released in year 2012

What is the total number of games released in year 2012 as an integer.

> Count unique games by name and not by rank.

In [7]:
games_2012 = set()
for game in games:
    if game['year'] == 2012:
        games_2012.add(game['name'])

len(games_2012)

447

### 4. What are the top 10 best-selling video games globally, and their corresponding sales figures?

Create a dictionary <code>top_10_games</code> with the name of the game as key and the global sales as value.

> If there are multiple games with the same name, then the game with the highest global sales should be considered.

Example expected output:

<code>{'Wii Sports': 82.74,
 'Super Mario Bros.': 40.24,
 'Mario Kart Wii': 35.82,
 'Wii Sports Resort': 33.0,
  ...
}</code>

In [8]:
top_games = {}

for game in games:
    title = game['name']
    global_sales = game['global_sales']

    if title not in top_games or top_games[title] < global_sales:
        top_games[title] = global_sales

top_games_sorted = dict(sorted(top_games.items(),key=lambda x: x[1], reverse=True))
top_10_games = dict(list(top_games_sorted.items())[:10])
top_10_games 
    

{'Wii Sports': 82.74,
 'Super Mario Bros.': 40.24,
 'Mario Kart Wii': 35.82,
 'Wii Sports Resort': 33.0,
 'Pokemon Red/Pokemon Blue': 31.37,
 'Tetris': 30.26,
 'New Super Mario Bros.': 30.01,
 'Wii Play': 29.02,
 'New Super Mario Bros. Wii': 28.62,
 'Duck Hunt': 28.31}

### 5. What is the minimum value of the global_sales and which platform sold such low sales?

Your output should be formatted as <code>platform: global_sales</code> e.g <code>PS2: 2.69.</code>

> There is a space after the colon.

In [9]:
min_global_sales = float('inf')
min_global_sales_platform = ''
for game in games:
    plat = game['platform']
    global_sales = game['global_sales']
    if global_sales < min_global_sales:
        min_global_sales = global_sales
        min_global_sales_platform = plat
    

print(f"{min_global_sales_platform}: {min_global_sales}")

PC: 0.01


### 6. Creating Nested Dictionaries for Total Sales and Game Count

Create a dictionary called <code>platform_sales</code>, which will consist of nested dictionaries. Each nested dictionary will have two key-value pairs: one with the key <code>total_sales</code> and its corresponding value, and another with the key <code>game_count</code> and its corresponding value."

<code>{'2600': {'total_sales': 97.08000000000003, 'game_count': 133},
 'PSP': {'total_sales': 296.2799999999948, 'game_count': 1213},
 'XOne': {'total_sales': 141.05999999999995, 'game_count': 213},
 'GC': {'total_sales': 199.3600000000007, 'game_count': 556},
 'WiiU': {'total_sales': 81.86000000000006, 'game_count': 143},
 'GEN': {'total_sales': 28.360000000000003, 'game_count': 27},
 ...
 }</code>

In [10]:
platform_sales = {}
for game in games:
    plat = game['platform']
    global_sales = game['global_sales']
    if plat not in platform_sales:
        platform_sales[plat] = {'total_sales' : global_sales,
                                'game_count' : 1}
    else:
        platform_sales[plat]['total_sales'] += global_sales
        platform_sales[plat]['game_count'] += 1

platform_sales

{'Wii': {'total_sales': 909.8099999999976, 'game_count': 1290},
 'NES': {'total_sales': 251.06999999999988, 'game_count': 98},
 'GB': {'total_sales': 254.41999999999987, 'game_count': 97},
 'DS': {'total_sales': 818.9099999999876, 'game_count': 2131},
 'X360': {'total_sales': 969.5999999999993, 'game_count': 1234},
 'PS3': {'total_sales': 949.3499999999987, 'game_count': 1304},
 'PS2': {'total_sales': 1233.4599999999837, 'game_count': 2127},
 'SNES': {'total_sales': 200.05000000000024, 'game_count': 239},
 'GBA': {'total_sales': 305.6199999999987, 'game_count': 786},
 '3DS': {'total_sales': 246.27000000000007, 'game_count': 499},
 'PS4': {'total_sales': 278.0999999999994, 'game_count': 336},
 'N64': {'total_sales': 218.20999999999987, 'game_count': 316},
 'PS': {'total_sales': 727.3899999999971, 'game_count': 1189},
 'XB': {'total_sales': 252.09000000000069, 'game_count': 803},
 'PC': {'total_sales': 254.70000000000104, 'game_count': 938},
 '2600': {'total_sales': 86.57000000000005, 'g

### 7. What is the distribution of <code>global_sales</code> across different platforms?

Create a list <code>distribution</code> which will have a tuple with the Name of the Platform and its percentage.

Percentage will be the <code>Platform's Global_Sales / Total Global_Sales</code>

> If one platform has multiple games, then the platform's global sales will be the sum of all the games' global sales.

Example expected output:

<code>[('Wii', 10.38861311773706),
 ('NES', 2.8145472644843057),
 ('GB', 2.8636479814892892),
 ('DS', 9.220285098043025),
 ('X360', 10.985556766256583),
 ('PS3', 10.737586935172041),...]</code>


In [11]:
distribution = []
total_sales_all_platforms = 0
for game in games:
    total_sales_all_platforms += game['global_sales']

for plat, values in platform_sales.items():
    distribution.append( (plat, (values['total_sales'] / total_sales_all_platforms)))

distribution

[('Wii', 0.10324706053242479),
 ('NES', 0.028491926322942094),
 ('GB', 0.028872091030720227),
 ('DS', 0.09293154652136922),
 ('X360', 0.11003214945124726),
 ('PS3', 0.10773413890423011),
 ('PS2', 0.1399755105838838),
 ('SNES', 0.022702074564482318),
 ('GBA', 0.034682369549597844),
 ('3DS', 0.02794721271179733),
 ('PS4', 0.03155934484570113),
 ('N64', 0.024762907726646726),
 ('PS', 0.08254567366887633),
 ('XB', 0.028607677965310455),
 ('PC', 0.028903865991370473),
 ('2600', 0.009824136941000915),
 ('PSP', 0.03310383489730073),
 ('XOne', 0.016007771247517476),
 ('GC', 0.02237184193772584),
 ('WiiU', 0.009289636710065092),
 ('GEN', 0.0032183495858471274),
 ('DC', 0.0018123075770796408),
 ('PSV', 0.006990491343024796),
 ('SAT', 0.0038118604579903052),
 ('SCD', 0.000212211344341824),
 ('WS', 0.00016114444329700002),
 ('NG', 0.0001634140833434367),
 ('TG16', 1.815712037149296e-05),
 ('3DO', 1.13482002321831e-05),
 ('GG', 4.53928009287324e-06),
 ('PCFX', 3.4044600696549298e-06)]

### 8. Which genre has the highest total sales in North America i.e <code>na_Sales</code>?

Output the name of the genre with highest total sales in North America.

In [15]:
best_genres_na = {}
for game in games:
    genre = game['genre']
    na_sales = game['na_sales']
    if genre not in best_genres_na:
        best_genres_na[genre] = na_sales
    else:
        best_genres_na[genre] += na_sales

li = sorted(best_genres_na.items(), key=lambda x: x[1])[-1]
li[0]

'Action'

### 9. Select Publishers with <code>global_sales</code> Over 1 Million Copies

Create a dictionary <code>publisher_game_count</code> where the keys are the publishers and the values are the counts of global_sales greater than 1.0 (million).

Example expected output:

<code>{ 'Take-Two Interactive': 91,
 'Sony Computer Entertainment': 147,
 'Activision': 161,
 'Ubisoft': 114,
 'Bethesda Softworks': 22,
 'Electronic Arts': 338,
 'Sega': 74,
 'SquareSoft': 19,
 'Atari': 42,
 '505 Games': 7,...
}</code>

> There can be multiple game with same publisher, you have to take the sum of all the games' global sales for that publisher.

In [18]:
publisher_game_count = {}
for game in games:
    publisher = game['publisher']
    global_sales = game['global_sales']
    if publisher not in publisher_game_count and global_sales > 1.0:
        publisher_game_count[publisher] = 1
    elif publisher in publisher_game_count and global_sales > 1.0:
        publisher_game_count[publisher] += 1

publisher_game_count
    

{'Nintendo': 335,
 'Microsoft Game Studios': 54,
 'Take-Two Interactive': 91,
 'Sony Computer Entertainment': 147,
 'Activision': 159,
 'Ubisoft': 114,
 'Bethesda Softworks': 22,
 'Electronic Arts': 333,
 'Sega': 74,
 'SquareSoft': 19,
 'Atari': 39,
 '505 Games': 7,
 'Capcom': 58,
 'GT Interactive': 4,
 'Konami Digital Entertainment': 59,
 'Sony Computer Entertainment Europe': 7,
 'Square Enix': 41,
 'LucasArts': 32,
 'Virgin Interactive': 12,
 'Warner Bros. Interactive Entertainment': 41,
 'Universal Interactive': 5,
 'Eidos Interactive': 26,
 'RedOctane': 3,
 'Vivendi Games': 11,
 'Enix Corporation': 11,
 'Namco Bandai Games': 51,
 'Palcom': 1,
 'Hasbro Interactive': 6,
 'THQ': 85,
 'Fox Interactive': 2,
 'Acclaim Entertainment': 18,
 'MTV Games': 6,
 'Disney Interactive Studios': 34,
 'Majesco Entertainment': 1,
 'Codemasters': 12,
 'Red Orb': 2,
 'Level 5': 3,
 'Arena Entertainment': 2,
 'Midway Games': 16,
 'JVC': 1,
 'Deep Silver': 5,
 '989 Studios': 6,
 'NCSoft': 1,
 'UEP System

### 10. Name of the game and its release year

Create a list <code>game_year</code> which will have a tuple with the Name of the Game and its Release Year.

Example expected output:

<code>[('Wii Sports', 2006),
 ('Super Mario Bros.', 1985),
 ('Mario Kart Wii', 2008),
 ('Wii Sports Resort', 2009),
 ('Pokemon Red/Pokemon Blue', 1996),...]</code>

In [19]:
game_year = []
for game in games:
    game_year.append((game['name'], game['year']))

game_year

[('Wii Sports', 2006),
 ('Super Mario Bros.', 1985),
 ('Mario Kart Wii', 2008),
 ('Wii Sports Resort', 2009),
 ('Pokemon Red/Pokemon Blue', 1996),
 ('Tetris', 1989),
 ('New Super Mario Bros.', 2006),
 ('Wii Play', 2006),
 ('New Super Mario Bros. Wii', 2009),
 ('Duck Hunt', 1984),
 ('Nintendogs', 2005),
 ('Mario Kart DS', 2005),
 ('Pokemon Gold/Pokemon Silver', 1999),
 ('Wii Fit', 2007),
 ('Wii Fit Plus', 2009),
 ('Kinect Adventures!', 2010),
 ('Grand Theft Auto V', 2013),
 ('Grand Theft Auto: San Andreas', 2004),
 ('Super Mario World', 1990),
 ('Brain Age: Train Your Brain in Minutes a Day', 2005),
 ('Pokemon Diamond/Pokemon Pearl', 2006),
 ('Super Mario Land', 1989),
 ('Super Mario Bros. 3', 1988),
 ('Grand Theft Auto V', 2013),
 ('Grand Theft Auto: Vice City', 2002),
 ('Pokemon Ruby/Pokemon Sapphire', 2002),
 ('Pokemon Black/Pokemon White', 2010),
 ('Brain Age 2: More Training in Minutes a Day', 2005),
 ('Gran Turismo 3: A-Spec', 2001),
 ('Call of Duty: Modern Warfare 3', 2011),
 ('P

## Data Transformation

### 11. Calculate the total sales (sum of <code>na_sales</code>, <code>eu_sales</code>, <code>jp_sales</code>, <code>other_sales</code>) for each platform

Create a new variable named total_sales_by_platform that contains the platform names as keys and their corresponding total sales as values.

Example of expected output:

<code>{'Wii': Decimal('909.20'),
 'NES': Decimal('251.05'),
 'GB': Decimal('254.43'),
 'DS': Decimal('817.89'),
 'X360': Decimal('969.30'),
 'PS3': Decimal('949.39'),
 'PS2': Decimal('1233.56'),
 'SNES': Decimal('200.04'),
 'GBA': Decimal('304.99'),
 '3DS': Decimal('246.17'),
 'PS4': Decimal('278.16'),
 ...}</code>

> For accurate results, use python's Decimal module to perform the addition. You can import the Decimal module using from decimal import Decimal.

In [20]:
from decimal import Decimal
total_sales_by_platform = {}
for game in games:
    plat = game['platform']
    na_sales = str(game['na_sales'])
    eu_sales = str(game['eu_sales'])
    jp_sales= str(game['jp_sales'])
    other_sales = str(game['other_sales'])
    total_sales = Decimal(na_sales) + Decimal(eu_sales) + Decimal(jp_sales) + Decimal(other_sales)
    if plat not in total_sales_by_platform:
        total_sales_by_platform[plat] = total_sales
    else:
        total_sales_by_platform[plat] += total_sales

total_sales_by_platform

{'Wii': Decimal('909.20'),
 'NES': Decimal('251.05'),
 'GB': Decimal('254.43'),
 'DS': Decimal('817.89'),
 'X360': Decimal('969.30'),
 'PS3': Decimal('949.39'),
 'PS2': Decimal('1233.56'),
 'SNES': Decimal('200.04'),
 'GBA': Decimal('304.99'),
 '3DS': Decimal('246.17'),
 'PS4': Decimal('278.16'),
 'N64': Decimal('218.01'),
 'PS': Decimal('727.56'),
 'XB': Decimal('251.57'),
 'PC': Decimal('253.89'),
 '2600': Decimal('86.48'),
 'PSP': Decimal('291.66'),
 'XOne': Decimal('141.10'),
 'GC': Decimal('196.73'),
 'WiiU': Decimal('81.79'),
 'GEN': Decimal('28.35'),
 'DC': Decimal('15.95'),
 'PSV': Decimal('61.61'),
 'SAT': Decimal('33.59'),
 'SCD': Decimal('1.86'),
 'WS': Decimal('1.42'),
 'NG': Decimal('1.44'),
 'TG16': Decimal('0.16'),
 '3DO': Decimal('0.10'),
 'GG': Decimal('0.04'),
 'PCFX': Decimal('0.03')}

### 12. Map Game Titles to their Release Years

Create a dictionary <code>year_game</code> where the keys will be the year and the values will be the list of games released in that year.

Example of expected output:

<code>{2006: ['Wii Sports',
  'Gears of War',
  'Pokemon Diamond/Pokemon Pearl',
  'New Super Mario Bros.',
  'Wii Play',
  'Final Fantasy XII',
  'Brain Age: Train Your Brain in Minutes a Day',
   ...],
 2008: ['Mario Kart Wii',
  'Grand Theft Auto IV',
  'Wii Fit',
   ...],
   ...
}</code>

In [21]:
year_game = {}
for game in games:
    if game['year'] not in year_game:
        year_game[game['year']] = [game['name']]
    else:
        year_game[game['year']].append(game['name'])

year_game

{2006: ['Wii Sports',
  'New Super Mario Bros.',
  'Wii Play',
  'Pokemon Diamond/Pokemon Pearl',
  'The Legend of Zelda: Twilight Princess',
  'Gears of War',
  'Final Fantasy XII',
  'Cooking Mama',
  'Guitar Hero II',
  'Grand Theft Auto: Vice City Stories',
  'Madden NFL 07',
  'Winning Eleven: Pro Evolution Soccer 2007',
  'The Elder Scrolls IV: Oblivion',
  'Resistance: Fall of Man',
  'Daxter',
  'FIFA Soccer 07',
  'MotorStorm',
  'Grand Theft Auto: Liberty City Stories',
  'Clubhouse Games',
  'English Training: Have Fun Improving Your Skills!',
  'Personal Trainer: Cooking',
  'WarioWare: Smooth Moves',
  "Yoshi's Island DS",
  'Call of Duty 3',
  'LEGO Star Wars II: The Original Trilogy',
  'Hannah Montana',
  'Medal of Honor Heroes',
  'Pac-Man: Adventures in Time',
  'WWE SmackDown vs. RAW 2007',
  'Marvel: Ultimate Alliance',
  'Need for Speed Carbon: Own the City',
  'Super Monkey Ball: Banana Blitz',
  "Devil May Cry 3: Dante's Awakening Special Edition",
  'Saints Row'

### 13. Determine the platforms associated with each publisher

Create a dictionary <code>publisher_platform</code> where the keys will be the publisher and the values will be the list of platforms associated with that publisher.

Example of expected output:

<code>{   'Nintendo': ['Wii', 'NES', 'GB', 'DS', 'SNES', ... ],
    'Microsoft Game Studios': ['X360', 'PC', 'XOne', 'XB', 'XBL', ... ],
    'Take-Two Interactive': ['PS2', 'PS3', 'X360', 'PS4', 'PC', ... ],
    'Sony Computer Entertainment': ['PS', 'PS2', 'PS3', 'PS4', 'PSP', ... ],
    'Activision': ['PS2', 'PS3', 'X360', 'PS', 'Wii', ... ],
     ...
}</code>

> If a publisher has multiple games on the same platform, then the platform should only be listed once.

In [24]:
publisher_platform = {}

for game in games:
    if game['publisher'] not in publisher_platform:
        publisher_platform[game['publisher']] = [game['platform']]
    elif game['platform'] not in publisher_platform[game['publisher']]:
        publisher_platform[game['publisher']].append(game['platform'])

publisher_platform

{'Nintendo': ['Wii',
  'NES',
  'GB',
  'DS',
  'SNES',
  'GBA',
  '3DS',
  'N64',
  'GC',
  'WiiU'],
 'Microsoft Game Studios': ['X360', 'XB', 'PC', 'XOne', 'WiiU'],
 'Take-Two Interactive': ['PS3',
  'PS2',
  'X360',
  'PS4',
  'PSP',
  'XOne',
  'Wii',
  'PS',
  'XB',
  'DS',
  'PC',
  'PSV',
  'N64',
  'GBA',
  'GC',
  'WiiU',
  '3DS'],
 'Sony Computer Entertainment': ['PS2', 'PS', 'PS3', 'PS4', 'PSP', 'PSV'],
 'Activision': ['X360',
  'PS4',
  'PS3',
  'XOne',
  'PC',
  'PS',
  'PS2',
  'Wii',
  '2600',
  'DS',
  'XB',
  'PSP',
  'N64',
  'PSV',
  'GBA',
  'GB',
  'GC',
  'WiiU',
  '3DS',
  'DC'],
 'Ubisoft': ['Wii',
  'PS3',
  'X360',
  'PS4',
  'XOne',
  'PS',
  'XB',
  'DS',
  'PS2',
  'PSV',
  'PC',
  'PSP',
  'GBA',
  'WiiU',
  'GC',
  '3DS',
  'N64',
  'DC',
  'GB'],
 'Bethesda Softworks': ['X360',
  'PS4',
  'PS3',
  'XOne',
  'PC',
  'PS2',
  'Wii',
  'XB'],
 'Electronic Arts': ['PS4',
  'PS3',
  'PC',
  'X360',
  'PS2',
  'Wii',
  'PS',
  'DS',
  'XOne',
  'PSP',
  'XB',


### 14. Normalize the games collection

we have a variable called games that contains details of various video games, such as their names, platforms, release years, genres, publishers, and sales figures in different regions. However, we noticed that some games share the same year, platform, genre, and publisher, leading to duplicate entries in the dataset.

To address this issue, you will create a new variable called <code>normalized_games</code> in a way that it contains all the unique games' details without any duplications based on the combination of year, platform, genre, and publisher.

Expected structure of the normalized_games variable:

<code>
normalized_games = {
    year: {
        platform: {
            genre: {
                publisher: {
                    games: [
                        {
                            'name': name,
                            'na_sales': na_sales,
                            'eu_sales': eu_sales,
                            'jp_sales': jp_sales,
                            'other_sales': other_sales,
                            'global_sales': global_sales
                        },
                        ...
                    ]
                }
            }
        }
    }
}
</code>

In [28]:
normalized_games = {}
for game in games:
    if game['year'] not in normalized_games:
        normalized_games[game['year']] = {}
    if game['platform'] not in normalized_games[game['year']]:
        normalized_games[game['year']][game['platform']] = {}
    if game['genre'] not in normalized_games[game['year']][game['platform']]:
        normalized_games[game['year']][game['platform']][game['genre']] = {}
    if game['publisher'] not in normalized_games[game['year']][game['platform']][game['genre']]:
        normalized_games[game['year']][game['platform']][game['genre']][game['publisher']] = {'games' : []}
    
    game_info = {
        'name': game['name'],
        'na_sales' : game['na_sales'],
        'eu_sales' : game['eu_sales'],
        'jp_sales' : game['jp_sales'],
        'other_sales' : game['other_sales'],
        'globals_sales' : game['global_sales']
    }

    normalized_games[game['year']][game['platform']][game['genre']][game['publisher']]['games'].append(game_info)

normalized_games

{2006: {'Wii': {'Sports': {'Nintendo': {'games': [{'name': 'Wii Sports',
       'na_sales': 41.49,
       'eu_sales': 29.02,
       'jp_sales': 3.77,
       'other_sales': 8.46,
       'globals_sales': 82.74},
      {'name': 'Super Swing Golf',
       'na_sales': 0.14,
       'eu_sales': 0.01,
       'jp_sales': 0.07,
       'other_sales': 0.01,
       'globals_sales': 0.23}]},
    'Activision': {'games': [{'name': 'Rapala Tournament Fishing!',
       'na_sales': 0.51,
       'eu_sales': 0.0,
       'jp_sales': 0.0,
       'other_sales': 0.04,
       'globals_sales': 0.56}]},
    'Electronic Arts': {'games': [{'name': 'Madden NFL 07',
       'na_sales': 0.46,
       'eu_sales': 0.0,
       'jp_sales': 0.0,
       'other_sales': 0.04,
       'globals_sales': 0.5}]}},
   'Misc': {'Nintendo': {'games': [{'name': 'Wii Play',
       'na_sales': 14.03,
       'eu_sales': 9.2,
       'jp_sales': 2.93,
       'other_sales': 2.85,
       'globals_sales': 29.02}]},
    'Sega': {'games': [{'name'

## Data Transformation & Aggregation

### 15. Identify the top 3 platforms with the highest average sales per game, considering only platforms that have released at least 50 games.

Create a sorted list named <code>top_3_platforms</code> with the top 3 platforms with the highest average sales per game, considering only platforms that have released at least 50 games.

In [36]:
# get all the platform sales
platform_sales = {}
for game in games:
    platform = game['platform']
    global_sales = game['global_sales']
    if platform not in platform_sales:
        platform_sales[platform] = []
    platform_sales[platform].append(global_sales)

# filter the dictionary to exclude those with less than 50 games
filtered_platform_sales = {}
for platform, sales in platform_sales.items():
    if len(sales) >= 50:
        filtered_platform_sales[platform] = sum(sales) / len(sales)  # calculates the average

top_3_platforms = sorted(filtered_platform_sales, key=filtered_platform_sales.get, reverse=True)[:3]
top_3_platforms

['GB', 'NES', 'SNES']

### 16. Determine the genre that has the highest total sales in North America and Europe combined.

Enter the name of the genre with highest total sales in North America and Europe as a string.

In [40]:
genre_sales = {}

for game in games:
    genre = game['genre']
    na_sales = game['na_sales']
    eu_sales = game['eu_sales']
    genre_sales.setdefault(genre, 0)
    genre_sales[genre] += na_sales + eu_sales

top_genre = max(genre_sales, key=genre_sales.get)
print(top_genre)

Action


### 17. Calculate the average sales for each publisher

Create a dictionary <code>publisher_avg_sales</code> where the keys will be the publisher and the values will be the average sales for each publisher. Average sales will be the <code>Publisher's Global_Sales / Total Global_Sales</code>.

Example of expected output:

<code>{'Nintendo': 0.196,
 'Microsoft Game Studios': 0.104,
 'Take-Two Interactive': 0.055,
 'Sony Computer Entertainment': 0.081,
  ...
}</code>

> For accurate results, use python's Decimal module to perform the division. You can import the Decimal module using from decimal import Decimal

In [42]:
from decimal import Decimal
publisher_sales = {}
for game in games:
    publisher = game['publisher']
    global_sales = Decimal(str(game['global_sales']))
    if publisher not in publisher_sales:
        publisher_sales.setdefault(publisher, Decimal('0'))
    publisher_sales[publisher] += global_sales

total_global_sales = Decimal('0')
for sales in publisher_sales.values():
    total_global_sales += sales

publisher_avg_sales = {}

for publisher, sales in publisher_sales.items():
    publisher_avg_sales[publisher] = sales / total_global_sales

publisher_avg_sales
    


{'Nintendo': Decimal('0.2025006894031641051887375922'),
 'Microsoft Game Studios': Decimal('0.02789274135068548803502508520'),
 'Take-Two Interactive': Decimal('0.04531336352711141776469960747'),
 'Sony Computer Entertainment': Decimal('0.06891535037000806857036508295'),
 'Activision': Decimal('0.08186705129499986949569732988'),
 'Ubisoft': Decimal('0.05373826737948495058426208895'),
 'Bethesda Softworks': Decimal('0.009252187649299759304673075374'),
 'Electronic Arts': Decimal('0.1240800865186785701721635457'),
 'Sega': Decimal('0.03071957802852256646357170984'),
 'SquareSoft': Decimal('0.006542237433854177896656479766'),
 'Atari': Decimal('0.01665575348077671621669161379'),
 '505 Games': Decimal('0.006344778749814173221197984106'),
 'Capcom': Decimal('0.02269072636425226141260126850'),
 'GT Interactive': Decimal('0.002863150918580067794148187068'),
 'Konami Digital Entertainment': Decimal('0.03161154656677224275616008679'),
 'Sony Computer Entertainment Europe': Decimal('0.0027110850

### 18. Find 5 games which have the highest rank among all games, and store the rank, name and genre as tuples in the list <code>top_ranked</code>

Create a list <code>top_ranked</code> which will have a tuple containing the rank, names, and the genre of the selected games.

Example of expected output:

<code>[(1, 'Wii Sports', 'Sports'),
 (2, 'Super Mario Bros.', 'Platform'),
 (3, 'Mario Kart Wii', 'Racing'),...]</code>


In [44]:
tuples = []
for game in games:
    rank = game['rank']
    name = game['name']
    genre = game['genre']
    tuples.append((rank, name, genre))

top_ranked = tuples[:5]
top_ranked


[(1, 'Wii Sports', 'Sports'),
 (2, 'Super Mario Bros.', 'Platform'),
 (3, 'Mario Kart Wii', 'Racing'),
 (4, 'Wii Sports Resort', 'Sports'),
 (5, 'Pokemon Red/Pokemon Blue', 'Role-Playing')]

### 19. Determine the Top Genre with the Highest Global Sales

Construct a sorted list named <code>top_genre</code>, consisting of genres with the highest average sales per game. Only consider games that have been released in the last 10 years.

> Note: 'last 10 year' corresponds to the last decade based on the latest year present in the dataset.

Expected output:

<code>
['Shooter', 
 'Platform',
  ...
]</code>

In [51]:
latest_year = 0
for game in games:
    year = game['year']
    if year > latest_year:
        latest_year = year

# Genre sales for the last 10 years
genre_sales = {}
for game in games:
    genre = game['genre']
    year = game['year']
    if year >= latest_year - 10:
        if genre not in genre_sales:
            genre_sales[genre] = []
        genre_sales[genre].append(game['global_sales'])

# Average sales for each genre
genre_avg_sales = {}
for genre, sales in genre_sales.items():
    game_count = len(sales)
    total_sales = 0
    for sale in sales:
        total_sales += sale
    avg_sales = total_sales / game_count
    genre_avg_sales[genre] = avg_sales

sorted_genres = list(genre_avg_sales.keys())
sorted_genres.sort(key=genre_avg_sales.get, reverse=True)
top_genre = sorted_genres

### 20. Calculate the percentage contribution of each genre to the total global sales

Create a dictionary <code>genre_percentage_sales</code> which contains the genre name as keys and the percentage contribution to the global sales as values.

Example of expected output:

<code>{'Platform': 9.319831757177944,
 'Racing': 8.206321661263361,
 'Role-Playing': 10.39601185591744,
 'Puzzle': 2.7459407831900946,
 'Misc': 9.07982117474026,...}</code>


In [58]:
# collect sales per genre
genre_sales = {}
for game in games:
    genre = game['genre']
    global_sales = game['global_sales']
    if genre not in genre_sales:
        genre_sales[genre] = 0
    genre_sales[genre] += global_sales

# total sales
total_sales_all_genres = 0
for genre, sales in genre_sales.items():
    total_sales_all_genres += sales

genre_percentage_sales = {}
for genre, sales in genre_sales.items():
    genre_percentage_sales[genre] = sales / total_sales_all_genres * 100

genre_percentage_sales

{'Sports': 14.857517671984782,
 'Platform': 9.40913325851091,
 'Racing': 8.247418000742181,
 'Role-Playing': 10.483807820498718,
 'Puzzle': 2.7486475782373554,
 'Misc': 8.963602917395312,
 'Shooter': 11.64552307826744,
 'Simulation': 4.425571126547195,
 'Action': 19.551133288016,
 'Fighting': 5.039168313101413,
 'Adventure': 2.6621742924681207,
 'Strategy': 1.9663026542305704}

### 21. Determine How Many Publishers Published the Top 100 Games

In this activity, you are being tasked to compute the number of unique publishers who have had their games ranked in the top 100 list. The count should be returned as an integer. The ranking of the games should be considered in your selection.

In [59]:
publishers_top100 = set()
for game in games:
    publisher = game['publisher']
    rank = game['rank']
    if rank <= 100 and publisher not in publishers_top100:
        publishers_top100.add(publisher)

len(publishers_top100)

11

### 22. How many games are published by each publisher

Create a list <code>publisher_game_count</code> which will have a tuple with the Name of the Publisher and the number of games published by that publisher.

Example expected output:

<code>[('Nintendo', 703),
 ('Microsoft Game Studios', 189),
 ('Take-Two Interactive', 413),
 ('Sony Computer Entertainment', 409),
  ...
]</code>

In [61]:
publisher_count ={}
for game in games:
    if game['publisher'] not in publisher_count:
        publisher_count[game['publisher']] = 1
    else:
        publisher_count[game['publisher']] += 1

publisher_game_count = []
for publisher, count in publisher_count.items():
    publisher_game_count.append((publisher, count))
publisher_game_count

[('Nintendo', 696),
 ('Microsoft Game Studios', 189),
 ('Take-Two Interactive', 412),
 ('Sony Computer Entertainment', 682),
 ('Activision', 966),
 ('Ubisoft', 918),
 ('Bethesda Softworks', 69),
 ('Electronic Arts', 1339),
 ('Sega', 632),
 ('SquareSoft', 52),
 ('Atari', 347),
 ('505 Games', 192),
 ('Capcom', 376),
 ('GT Interactive', 45),
 ('Konami Digital Entertainment', 823),
 ('Sony Computer Entertainment Europe', 15),
 ('Square Enix', 231),
 ('LucasArts', 89),
 ('Virgin Interactive', 62),
 ('Warner Bros. Interactive Entertainment', 217),
 ('Universal Interactive', 22),
 ('Eidos Interactive', 196),
 ('RedOctane', 4),
 ('Vivendi Games', 161),
 ('Enix Corporation', 30),
 ('Namco Bandai Games', 928),
 ('Palcom', 1),
 ('Hasbro Interactive', 16),
 ('THQ', 712),
 ('Fox Interactive', 8),
 ('Acclaim Entertainment', 184),
 ('MTV Games', 40),
 ('Disney Interactive Studios', 214),
 ('Majesco Entertainment', 90),
 ('Codemasters', 150),
 ('Red Orb', 2),
 ('Level 5', 27),
 ('Arena Entertainment',