# Video Game Sales 

## Data Set : vg-stats.  

## Owner : Samer Odeh.

## Date : 10 / 8 / 2021

In [1]:
import pandas as pd 

In [2]:
df = pd.read_csv('./vgsales.csv')

In [3]:
df.keys()

Index(['Rank', 'Name', 'Platform', 'Year', 'Genre', 'Publisher', 'NA_Sales',
       'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'],
      dtype='object')

# Which company is the most common video game publisher ? 

In [4]:
most_common_publisher = df['Publisher']
most_common_publisher.mode()

0    Electronic Arts
dtype: object

# What’s the most common platform ?

In [5]:
most_common_platform = df['Platform']
most_common_platform.mode()

0    DS
dtype: object

# What about the most common genre ?

In [6]:
most_common_genre = df['Genre']
most_common_genre.mode()

0    Action
dtype: object

# What are the top 20 highest grossing games ?

In [7]:
highest_grossing_games = df.sort_values('Global_Sales', ascending=False)
highest_grossing_games.head(20)

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37
5,6,Tetris,GB,1989.0,Puzzle,Nintendo,23.2,2.26,4.22,0.58,30.26
6,7,New Super Mario Bros.,DS,2006.0,Platform,Nintendo,11.38,9.23,6.5,2.9,30.01
7,8,Wii Play,Wii,2006.0,Misc,Nintendo,14.03,9.2,2.93,2.85,29.02
8,9,New Super Mario Bros. Wii,Wii,2009.0,Platform,Nintendo,14.59,7.06,4.7,2.26,28.62
9,10,Duck Hunt,NES,1984.0,Shooter,Nintendo,26.93,0.63,0.28,0.47,28.31


# For North American video game sales, what’s the median ? 

In [8]:
north_american_median = df['NA_Sales']
north_american_median.median()

0.08

# Provide a secondary output showing ten games surrounding the median sales output 

In [9]:
ten_games_surrounding = df[df['NA_Sales'] == north_american_median]
ten_games_surrounding.sample(10)

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
7657,7659,UFC Personal Trainer: The Ultimate Fitness System,Wii,2011.0,Sports,THQ,0.12,0.06,0.0,0.02,0.2
8522,8524,N2O: Nitrous Oxide,PS,1998.0,Racing,Gremlin Interactive Ltd,0.09,0.06,0.0,0.01,0.16
10133,10135,Home Run King,GC,2002.0,Sports,Sega,0.09,0.02,0.0,0.0,0.11
319,320,Mario & Luigi: Bowser's Inside Story,DS,2009.0,Role-Playing,Nintendo,2.26,0.48,0.81,0.27,3.83
4434,4436,G-Force,Wii,2009.0,Action,Disney Interactive Studios,0.19,0.2,0.0,0.05,0.44
1373,1375,Ridge Racer,PSP,2004.0,Racing,Sony Computer Entertainment,0.36,0.45,0.32,0.28,1.41
10219,10221,Chameleon Twist,N64,1997.0,Platform,Sunsoft,0.09,0.02,0.0,0.0,0.11
1135,1137,Rage,X360,2011.0,Shooter,Bethesda Softworks,0.82,0.61,0.03,0.15,1.62
8360,8362,Gallop Racer 2001,PS2,2001.0,Sports,Tecmo Koei,0.05,0.04,0.06,0.01,0.17
14448,14451,Go! Go! Hypergrind,GC,2003.0,Sports,Atlus,0.02,0.01,0.0,0.0,0.03


# For the top-selling game of all time, how many standard deviations above/below the mean are its sales for North America ?

In [10]:
north_america_mean = df['NA_Sales']
north_america_mean.mean()
na_mean = north_america_mean.mean() 
print(na_mean)

north_america_global_top = df['NA_Sales']
north_america_global_top.iloc()[0]
global_top = north_america_global_top.iloc()[0]
print(global_top)

north_america_standard = df['NA_Sales']
north_america_standard.std()
north_america_std = north_america_standard.std()
print(north_america_std)

north_america_mean_north_america_global_top_distance = global_top - na_mean
north_america_mean_north_america_global_top_distance

north_america_deviations = north_america_mean_north_america_global_top_distance / north_america_std 
north_america_deviations

0.26466742981082064
41.49
0.8166830292988796


50.47898767479108

# The Nintendo Wii seems to have outdone itself with games. How does its average number of sales compare with all of the other platforms ?

In [11]:
other_platforms_m = df[df['Platform'] !='Wii'].Global_Sales.mean()
print("other platforms: ", other_platforms_m)

wii_platform_m = df[df['Platform'] =='Wii'].Global_Sales.mean()
print("Wii platform: ", wii_platform_m)

difference = abs(wii_platform_m - other_platforms_m)
print ("Difference: ", difference)

other platforms:  0.5233896418516336
Wii platform:  0.6994037735849057
Difference:  0.17601413173327207


# Top 5 Racing games ?

In [12]:
top_Racing_games = df[df['Genre'] == 'Racing'].sort_values('Global_Sales', ascending=False)
top_Racing_games.head(5)

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
11,12,Mario Kart DS,DS,2005.0,Racing,Nintendo,9.81,7.57,4.13,1.92,23.42
28,29,Gran Turismo 3: A-Spec,PS2,2001.0,Racing,Sony Computer Entertainment,6.85,5.09,1.87,1.16,14.98
42,43,Mario Kart 7,3DS,2011.0,Racing,Nintendo,4.74,3.91,2.67,0.89,12.21
47,48,Gran Turismo 4,PS2,2004.0,Racing,Sony Computer Entertainment,3.01,0.01,1.1,7.53,11.66


# What newest 5 games ?

In [13]:
newest_games = df.sort_values('Year',ascending=False)
newest_games.head(5)

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
5957,5959,Imagine: Makeup Artist,DS,2020.0,Simulation,Ubisoft,0.27,0.0,0.0,0.02,0.29
14390,14393,Phantasy Star Online 2 Episode 4: Deluxe Package,PS4,2017.0,Role-Playing,Sega,0.0,0.0,0.03,0.0,0.03
16241,16244,Phantasy Star Online 2 Episode 4: Deluxe Package,PSV,2017.0,Role-Playing,Sega,0.0,0.0,0.01,0.0,0.01
16438,16441,Brothers Conflict: Precious Baby,PSV,2017.0,Action,Idea Factory,0.0,0.0,0.01,0.0,0.01
8293,8295,Shin Megami Tensei IV: Final,3DS,2016.0,Role-Playing,Deep Silver,0.03,0.0,0.14,0.0,0.17


# What is the most popular game in Europe ?

In [14]:
popular_game_in_europe = df.sort_values('EU_Sales', ascending=False)
popular_game_in_europe.iloc()[0]

Rank                     1
Name            Wii Sports
Platform               Wii
Year                2006.0
Genre               Sports
Publisher         Nintendo
NA_Sales             41.49
EU_Sales             29.02
JP_Sales              3.77
Other_Sales           8.46
Global_Sales         82.74
Name: 0, dtype: object

In [15]:
def test():

    def assert_equal(actual,expected):
        assert actual == expected, f"Expected {expected} but got {actual}"

        assert_equal(most_common_publisher, None)
        assert_equal(most_common_platform, None)
        assert_equal(most_common_genre, None)
        assert_equal(highest_grossing_games.iloc[0].Name, None)
        assert_equal(highest_grossing_games.iloc[19].Name, None)
        assert_equal(north_american_median, None)
        assert_equal(ten_games_surrounding, None)
        assert_equal(popular_game_in_europe.iloc()[0], None)    

    print("pass tests")

test()

Success!!!
