# Data analysis for Video Game Sales


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
df = pd.read_csv('vgsales.csv')

In [15]:
df.keys()

Index(['Rank', 'Name', 'Platform', 'Year', 'Genre', 'Publisher', 'NA_Sales',
       'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'],
      dtype='object')

### Which company is the most common video game publisher?


In [34]:
most_common_publisher = df['Publisher'].mode()[0]
most_common_publisher

'Electronic Arts'

### What’s the most common platform?


In [20]:
df['Platform'].mode()[0]

'DS'

### What about the most common genre?

In [23]:
df['Genre'].mode()[0]

'Action'

### What are the top 20 highest grossing games?

In [25]:
df.sort_values('Global_Sales', ascending=False).head(20)['Name']

0                                       Wii Sports
1                                Super Mario Bros.
2                                   Mario Kart Wii
3                                Wii Sports Resort
4                         Pokemon Red/Pokemon Blue
5                                           Tetris
6                            New Super Mario Bros.
7                                         Wii Play
8                        New Super Mario Bros. Wii
9                                        Duck Hunt
10                                      Nintendogs
11                                   Mario Kart DS
12                     Pokemon Gold/Pokemon Silver
13                                         Wii Fit
14                                    Wii Fit Plus
15                              Kinect Adventures!
16                              Grand Theft Auto V
17                   Grand Theft Auto: San Andreas
18                               Super Mario World
19    Brain Age: Train Your Bra

### For North American video game sales, what’s the median?

In [28]:
median = df['NA_Sales'].median()
median 

0.08

### Provide a secondary output showing ten games surrounding the median sales output

In [63]:
df[ df['NA_Sales'] == median ].sample(10)

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
10542,10544,Capcom vs. SNK 2 EO,XB,2003.0,Fighting,Capcom,0.08,0.02,0.0,0.0,0.1
9308,9310,Mary-Kate and Ashley: Crush Course,PS,2001.0,Action,Acclaim Entertainment,0.08,0.05,0.0,0.01,0.14
10804,10806,Imagine: Reporter,DS,2010.0,Simulation,Ubisoft,0.08,0.01,0.0,0.01,0.1
10610,10612,NBA Live 16,XOne,2015.0,Sports,Electronic Arts,0.08,0.01,0.0,0.01,0.1
10173,10175,XXX,GBA,2002.0,Action,Activision,0.08,0.03,0.0,0.0,0.11
4069,4071,Jake Power: Fireman,DS,2008.0,Adventure,Ubisoft,0.08,0.35,0.0,0.06,0.49
11205,11207,Midnight Play! Pack,DS,2008.0,Misc,Ubisoft,0.08,0.0,0.0,0.01,0.09
10084,10086,Batman: Rise of Sin Tzu,XB,2003.0,Action,Ubisoft,0.08,0.02,0.0,0.0,0.11
8468,8470,Vehicular Combat League presents Motor Mayhem,PS2,2001.0,Racing,Atari,0.08,0.06,0.0,0.02,0.16
10042,10044,Tom Clancy's Rainbow Six: Rogue Spear,GBA,2002.0,Shooter,Ubisoft,0.08,0.03,0.0,0.0,0.11


### For the top-selling game of all time, how many standard deviations above/below the mean are its sales for North America?

In [14]:
na_mean = df['NA_Sales'].mean()
na_mean

0.26466742981082064

In [12]:
global_top = df['NA_Sales'].iloc[0]
global_top

41.49

In [6]:
na_std = df['NA_Sales'].std()
na_std

0.8166830292988796

In [15]:
distance = global_top - na_mean
distance

41.22533257018918

In [16]:
deviations_from_mean = distance / na_std
deviations_from_mean

50.47898767479108

### The Nintendo Wii seems to have outdone itself with games. How does its average number of sales compare with all of the other platforms?

In [80]:
wii = df[df['Platform'] == 'Wii']
wii

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.00
7,8,Wii Play,Wii,2006.0,Misc,Nintendo,14.03,9.20,2.93,2.85,29.02
8,9,New Super Mario Bros. Wii,Wii,2009.0,Platform,Nintendo,14.59,7.06,4.70,2.26,28.62
...,...,...,...,...,...,...,...,...,...,...,...
16517,16520,Teenage Mutant Ninja Turtles,Wii,2007.0,Action,Konami Digital Entertainment,0.00,0.01,0.00,0.00,0.01
16552,16555,Mahou Sensei Negima!? Neo-Pactio Fight!!,Wii,2007.0,Fighting,Marvelous Interactive,0.00,0.00,0.01,0.00,0.01
16573,16576,Mini Desktop Racing,Wii,2007.0,Racing,Popcorn Arcade,0.01,0.00,0.00,0.00,0.01
16574,16577,Yattaman Wii: BikkuriDokkiri Machine de Mou Ra...,Wii,2008.0,Racing,Takara Tomy,0.00,0.00,0.01,0.00,0.01


In [81]:
wii_mean = wii['Global_Sales'].mean()
wii_mean

0.6994037735849057

In [82]:
df_mean = df['Global_Sales'].mean()
df_mean

0.5374406555006628

it bigger than all platform so the defferance is

In [84]:
wii_mean - df_mean

0.1619631180842429

### display the top shooter game in rank ?

In [86]:
shooter = df[df['Genre']=='Shooter'].head(10)
shooter

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
9,10,Duck Hunt,NES,1984.0,Shooter,Nintendo,26.93,0.63,0.28,0.47,28.31
29,30,Call of Duty: Modern Warfare 3,X360,2011.0,Shooter,Activision,9.03,4.28,0.13,1.32,14.76
31,32,Call of Duty: Black Ops,X360,2010.0,Shooter,Activision,9.67,3.73,0.11,1.13,14.64
33,34,Call of Duty: Black Ops 3,PS4,2015.0,Shooter,Activision,5.77,5.81,0.35,2.31,14.24
34,35,Call of Duty: Black Ops II,PS3,2012.0,Shooter,Activision,4.99,5.88,0.65,2.52,14.03
35,36,Call of Duty: Black Ops II,X360,2012.0,Shooter,Activision,8.25,4.3,0.07,1.12,13.73
36,37,Call of Duty: Modern Warfare 2,X360,2009.0,Shooter,Activision,8.52,3.63,0.08,1.29,13.51
37,38,Call of Duty: Modern Warfare 3,PS3,2011.0,Shooter,Activision,5.54,5.82,0.49,1.62,13.46
40,41,Call of Duty: Black Ops,PS3,2010.0,Shooter,Activision,5.98,4.44,0.48,1.83,12.73
43,44,Halo 3,X360,2007.0,Shooter,Microsoft Game Studios,7.97,2.83,0.13,1.21,12.14


### Display 10 newest games ?

In [89]:
df.sort_values('Year',ascending=False).head(10)

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
5957,5959,Imagine: Makeup Artist,DS,2020.0,Simulation,Ubisoft,0.27,0.0,0.0,0.02,0.29
14390,14393,Phantasy Star Online 2 Episode 4: Deluxe Package,PS4,2017.0,Role-Playing,Sega,0.0,0.0,0.03,0.0,0.03
16241,16244,Phantasy Star Online 2 Episode 4: Deluxe Package,PSV,2017.0,Role-Playing,Sega,0.0,0.0,0.01,0.0,0.01
16438,16441,Brothers Conflict: Precious Baby,PSV,2017.0,Action,Idea Factory,0.0,0.0,0.01,0.0,0.01
8293,8295,Shin Megami Tensei IV: Final,3DS,2016.0,Role-Playing,Deep Silver,0.03,0.0,0.14,0.0,0.17
13647,13649,Hitman (2016),PS4,2016.0,Action,Square Enix,0.0,0.04,0.0,0.01,0.04
13596,13598,Card Fight!! Vanguard G: Stride to Victory!!,3DS,2016.0,Misc,FuRyu,0.0,0.0,0.04,0.0,0.04
13602,13604,Refrain no Chika Meikyuu to Majo no Ryodan,PSV,2016.0,Action,Nippon Ichi Software,0.0,0.0,0.04,0.0,0.04
13614,13616,Utawarerumono: Futari no Hakuoro,PS4,2016.0,Misc,Aqua Plus,0.0,0.0,0.04,0.0,0.04
13620,13622,Kamen Rider: Battride War Genesis,PS3,2016.0,Action,Namco Bandai Games,0.0,0.0,0.04,0.0,0.04


### How many Atari games ?

In [92]:
df[df['Publisher']=='Atari'].shape[0]

363