# Data Analysis with Pandas
## Video Game Sale
### Skyler Johnson, 6-2-2021

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('./vgsales.csv')

In [3]:
df

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.00
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37
...,...,...,...,...,...,...,...,...,...,...,...
16593,16596,Woody Woodpecker in Crazy Castle 5,GBA,2002.0,Platform,Kemco,0.01,0.00,0.00,0.00,0.01
16594,16597,Men in Black II: Alien Escape,GC,2003.0,Shooter,Infogrames,0.01,0.00,0.00,0.00,0.01
16595,16598,SCORE International Baja 1000: The Official Game,PS2,2008.0,Racing,Activision,0.00,0.00,0.00,0.00,0.01
16596,16599,Know How 2,DS,2010.0,Puzzle,7G//AMES,0.00,0.01,0.00,0.00,0.01


### Which company is the most common video game publisher?


In [4]:
most_common_publisher = df['Publisher'].mode()[0]
most_common_publisher


'Electronic Arts'

### What’s the most common platform?


In [5]:
most_common_platform = df['Platform'].mode()[0]
most_common_platform


'DS'

### What about the most common genre?


In [6]:
most_common_genre = df['Genre'].mode()[0]
most_common_genre

'Action'

### What are the top 20 highest grossing games?


In [7]:
top_twenty_highest_grossing_games = df[['Name', 'Global_Sales']].head(20).set_index('Global_Sales')
top_twenty_highest_grossing_games

Unnamed: 0_level_0,Name
Global_Sales,Unnamed: 1_level_1
82.74,Wii Sports
40.24,Super Mario Bros.
35.82,Mario Kart Wii
33.0,Wii Sports Resort
31.37,Pokemon Red/Pokemon Blue
30.26,Tetris
30.01,New Super Mario Bros.
29.02,Wii Play
28.62,New Super Mario Bros. Wii
28.31,Duck Hunt


### For North American video game sales, what’s the median?


In [8]:
na_median_sales = df[['Name', 'NA_Sales']].median()[0]
na_median_sales


0.08

### Provide a secondary output showing ten games surrounding the median sales output


In [9]:
df[df['NA_Sales'] == 0.08].head()


Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
446,447,Dragon Warrior IV,NES,1990.0,Role-Playing,Enix Corporation,0.08,0.0,3.03,0.01,3.12
497,498,World Soccer Winning Eleven 7 International,PS2,2003.0,Sports,Konami Digital Entertainment,0.08,1.24,1.13,0.45,2.9
1617,1619,Farming Simulator 2015,PC,2014.0,Simulation,Focus Home Interactive,0.08,1.02,0.0,0.13,1.23
1926,1928,Pro Evolution Soccer 2008,X360,2007.0,Sports,Konami Digital Entertainment,0.08,0.9,0.04,0.05,1.07
2067,2069,Winning Eleven: Pro Evolution Soccer 2007 (All...,X360,2006.0,Sports,Konami Digital Entertainment,0.08,0.9,0.02,0.0,1.0


In [10]:
df[df['NA_Sales'] == 0.08].tail()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
11403,11405,My English Coach: Para Hispanoparlantes,DS,2009.0,Misc,Ubisoft,0.08,0.0,0.0,0.01,0.08
11431,11433,Little League World Series Baseball: Double Play,DS,2010.0,Sports,Activision,0.08,0.0,0.0,0.01,0.08
11432,11434,DanceDanceRevolution,X360,2011.0,Simulation,Konami Digital Entertainment,0.08,0.0,0.0,0.01,0.08
11455,11457,The Hidden,3DS,,Adventure,Unknown,0.08,0.0,0.0,0.0,0.08
11492,11494,Ultimate Shooting Collection,Wii,2008.0,Shooter,Milestone,0.08,0.0,0.0,0.0,0.08


### For the top-selling game of all time, how many standard deviations above/below the mean are its sales for North America?

In [11]:
max = df[['NA_Sales']].max()
dev = df[['NA_Sales']].std()
mean = df[['NA_Sales']].mean()
answer = (max-mean)/dev

print(answer[0])

50.47898767479108


In [12]:
df

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.00
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37
...,...,...,...,...,...,...,...,...,...,...,...
16593,16596,Woody Woodpecker in Crazy Castle 5,GBA,2002.0,Platform,Kemco,0.01,0.00,0.00,0.00,0.01
16594,16597,Men in Black II: Alien Escape,GC,2003.0,Shooter,Infogrames,0.01,0.00,0.00,0.00,0.01
16595,16598,SCORE International Baja 1000: The Official Game,PS2,2008.0,Racing,Activision,0.00,0.00,0.00,0.00,0.01
16596,16599,Know How 2,DS,2010.0,Puzzle,7G//AMES,0.00,0.01,0.00,0.00,0.01


### The Nintendo Wii seems to have outdone itself with games. How does its average number of sales compare with all of the other platforms?

In [13]:
wii_games = df[df['Platform'] == 'Wii']
wii_games_sale = df[df['Global_Sales'] == 'Wii']
wii_mean_sales = wii_games['Global_Sales'].mean()
wii_games
wii_games_sale
wii_mean_sales

0.6994037735849057

In [14]:
not_wii_games = df[df['Platform'] != 'Wii']
not_wii_games_sale = df[df['Global_Sales'] != 'Wii']
mean_sales = not_wii_games_sale['Global_Sales'].mean()
not_wii_games
not_wii_games_sale
mean_sales

0.5374406555006628

In [15]:
wii_mean_sales/mean_sales

1.3013600039866042

### How many games with genre 'shooter'?


In [16]:
shooter = df[df['Genre'] == 'Shooter']
# shooter
shooter_total = shooter['Genre'].count()
shooter_total

1310

In [17]:
def test():

    def assert_equal(actual,expected):
        assert actual == expected, f"Expected {expected} but got {actual}"

    assert_equal(most_common_publisher, 'Electronic Arts')
    assert_equal(most_common_platform, 'DS')
    assert_equal(most_common_genre, 'Action')
    assert_equal(top_twenty_highest_grossing_games.iloc[0].Name, 'Wii Sports')
    assert_equal(top_twenty_highest_grossing_games.iloc[19].Name, 'Brain Age: Train Your Brain in Minutes a Day')
    assert_equal(na_median_sales, 0.08)
    # assert_equal(ten_median_na_seller_names, None)
    assert_equal(shooter_total, 1310)

    print("Success!!!")

test()

Success!!!
