# Data Analysis with Pandas

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

import pandas as pd

In [2]:
df = pd.read_csv("vgsales.csv")
df

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.00
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37
...,...,...,...,...,...,...,...,...,...,...,...
16593,16596,Woody Woodpecker in Crazy Castle 5,GBA,2002.0,Platform,Kemco,0.01,0.00,0.00,0.00,0.01
16594,16597,Men in Black II: Alien Escape,GC,2003.0,Shooter,Infogrames,0.01,0.00,0.00,0.00,0.01
16595,16598,SCORE International Baja 1000: The Official Game,PS2,2008.0,Racing,Activision,0.00,0.00,0.00,0.00,0.01
16596,16599,Know How 2,DS,2010.0,Puzzle,7G//AMES,0.00,0.01,0.00,0.00,0.01


## Which company is the most common video game publisher?

In [3]:
most_common_publisher = df.Publisher.mode()[0]
most_common_publisher

'Electronic Arts'

## What’s the most common platform?

In [4]:
common_platform_df = df['Platform']
most_common_platform=common_platform_df.value_counts().idxmax()
most_common_platform

'DS'

## What about the most common genre?

In [5]:
common_genre_df = df['Genre']
most_common_genre=common_genre_df.value_counts().idxmax()
most_common_genre

'Action'

## What are the top 20 highest grossing games?

In [6]:
top_twenty_highest_grossing_games_df = df.set_index('Global_Sales')
top_twenty_highest_grossing_games=top_twenty_highest_grossing_games_df.sort_index(ascending=False).head(20)["Name"]
top_twenty_highest_grossing_games

Global_Sales
82.74                                      Wii Sports
40.24                               Super Mario Bros.
35.82                                  Mario Kart Wii
33.00                               Wii Sports Resort
31.37                        Pokemon Red/Pokemon Blue
30.26                                          Tetris
30.01                           New Super Mario Bros.
29.02                                        Wii Play
28.62                       New Super Mario Bros. Wii
28.31                                       Duck Hunt
24.76                                      Nintendogs
23.42                                   Mario Kart DS
23.10                     Pokemon Gold/Pokemon Silver
22.72                                         Wii Fit
22.00                                    Wii Fit Plus
21.82                              Kinect Adventures!
21.40                              Grand Theft Auto V
20.81                   Grand Theft Auto: San Andreas
20.61          

## For North American video game sales, what’s the median?

In [7]:
na_median_sales = df['NA_Sales'].median()
na_median_sales
ten_median_na_seller_names=df[df['NA_Sales']==na_median_sales].head(10)
ten_median_na_seller_names

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
446,447,Dragon Warrior IV,NES,1990.0,Role-Playing,Enix Corporation,0.08,0.0,3.03,0.01,3.12
497,498,World Soccer Winning Eleven 7 International,PS2,2003.0,Sports,Konami Digital Entertainment,0.08,1.24,1.13,0.45,2.9
1617,1619,Farming Simulator 2015,PC,2014.0,Simulation,Focus Home Interactive,0.08,1.02,0.0,0.13,1.23
1926,1928,Pro Evolution Soccer 2008,X360,2007.0,Sports,Konami Digital Entertainment,0.08,0.9,0.04,0.05,1.07
2067,2069,Winning Eleven: Pro Evolution Soccer 2007 (All...,X360,2006.0,Sports,Konami Digital Entertainment,0.08,0.9,0.02,0.0,1.0
2373,2375,Phantasy Star Portable 2,PSP,2009.0,Role-Playing,Sega,0.08,0.11,0.62,0.06,0.88
2579,2581,The Sims 2: Castaway,PSP,2007.0,Simulation,Electronic Arts,0.08,0.46,0.0,0.25,0.8
3186,3188,SingStar Queen,PS2,2009.0,Misc,Sony Computer Entertainment,0.08,0.12,0.0,0.44,0.63
3503,3505,Top Spin 3,PS3,2008.0,Action,Take-Two Interactive,0.08,0.37,0.0,0.12,0.57
3703,3705,Sonic & All-Stars Racing Transformed,PS3,2012.0,Racing,Sega,0.08,0.33,0.01,0.11,0.54


## For the top-selling game of all time, how many standard deviations above/below the mean are its sales for North America?

In [8]:
mean_na=df['NA_Sales'].mean()
data_row=df['NA_Sales'].iloc[0]
std_NA=df['NA_Sales'].std()
na_std= ((data_row-mean_na)/std_NA)
na_std

50.47898767479108

## The Nintendo Wii seems to have outdone itself with games. How does its average number of sales compare with all of the other platforms?

In [9]:
nintendo_wii = df.groupby(df["Platform"] == "Wii")["Global_Sales"].mean()
num_of_sales =(nintendo_wii[0],nintendo_wii[1])
num_of_sales

(0.5233896418516336, 0.6994037735849057)

## Come up with 3 more questions that can be answered with this data set.

1. What is the average for JP video games sales?

In [10]:
mean_jp = df['JP_Sales'].mean()
mean_jp

0.077781660441017

2. what is the Genre for Least sale game?

In [11]:
genre_max=df[['Genre']].max()[0]
genre_max

'Strategy'

3. what is the max for North America sales?

In [12]:
na_max_sales = df["NA_Sales"].max()
na_max_sales

41.49

In [13]:
def test():

    def assert_equal(actual,expected):
        assert actual == expected, f"Expected {expected} but got {actual}"
    assert_equal(most_common_publisher, 'Electronic Arts')
    assert_equal(most_common_platform, 'DS')
    assert_equal(most_common_genre, 'Action')
    assert_equal(top_twenty_highest_grossing_games.iloc[0], "Wii Sports")
    assert_equal(top_twenty_highest_grossing_games.iloc[19], "Brain Age: Train Your Brain in Minutes a Day")
    assert_equal(na_median_sales, 0.08)
    assert_equal(na_std,50.47898767479108)
    assert_equal(num_of_sales[0], 0.5233896418516336)
    assert_equal(num_of_sales[1], 0.6994037735849057)
    assert_equal(mean_jp,0.077781660441017)
    assert_equal(genre_max,'Strategy')
    assert_equal(na_max_sales,41.49)
    print("Success!!!")

test()

Success!!!
