# **Video Games Sales Visualization**

# Inspecting the Data
**Getting the csv.file from Kaggle website**

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

**Importing libaries that will be used in this notebook**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Reading csv file 
df = pd.read_csv('/kaggle/input/videogamesales/vgsales.csv')
df.head()

**Information about each Fields:**
* Rank - Ranking of overall sales
* Name - The games name
* Platform - Platform of the games release (i.e. PC,PS4, etc.)
* Year - Year of the game's release
* Genre - Genre of the game
* Publisher - Publisher of the game
* NA_Sales - Sales in North America (in millions)
* EU_Sales - Sales in Europe (in millions)
* JP_Sales - Sales in Japan (in millions)
* Other_Sales - Sales in the rest of the world (in millions)
* Global_Sales - Total worldwide sales.

In [None]:
# Calling info on the DataFrame to get overall picture of the data
df.info()

In [None]:
# Checking the Shape of the DataFrame
df.shape

Property shape gives the dimension of this DataFrame. There are 16598 data entries (number of rows) and 11 columns

In [None]:
# Checking for NaN value
df.isna().sum()

# The Growth in number of game releases between 1980 and 2020

In [None]:
game_released_by_year = df.groupby('Year').count()['Rank']
game_released_by_year.head()

In [None]:
# Line graph to show the growth or decline in number of game released per year 
plt.figure(figsize=(12,8))
sns.lineplot(data=game_released_by_year,linewidth=3)

plt.title("Number of Game Released per year from 1980 to 2020",fontsize=15)
plt.ylabel("Count",fontsize=12)
plt.xlabel("Year",fontsize=12)
plt.grid()
plt.show()

In [None]:
# Year with the highest number of game released
max_number = game_released_by_year.max()
max_year = game_released_by_year.idxmax()
max_year, max_number

**Analysis:**
* The latest update of this dataset was five years ago (2016). The line graph from 2016 onward is almost flat, indicating minimal information in that time interval.
* The number of video games released per year was on an upward trend. The year with the highest number of games released was 2009 with a total of 1432 released.  From that year onward, we saw a steep decline in the number of new releases. There was a slight increase around 2014 before resuming the regressive pattern starting in 2015.

# Sales acorss regions

In [None]:
# Total Revenue by region
region_sales = df.iloc[:,-5:-1] # NA_Sales column to Other_Sales column
region_sales = region_sales.sum(axis=0).reset_index()
region_sales.columns = ['Region','Sales']
region_sales

In [None]:
# Barplot showing total revenue by region
plt.figure(figsize=(12,8))
sns.barplot(x='Region', y='Sales', data=region_sales, palette='viridis')
plt.title("Revenue by Region",fontsize=15)
plt.ylabel("Sales (in millions)",fontsize=12)
plt.show()

In [None]:
# Distribution in form of pie chart
region_sales.plot.pie(y='Sales',autopct="%1.1f%%",figsize=(12,8),
                  fontsize=12,cmap="Pastel1",startangle=180,
                  explode=[0.02]*4, labels=region_sales['Region'].to_list())
plt.title("Distribution of Sales (in millions)",fontsize=15)
plt.ylabel("")
plt.legend(loc='lower left')
plt.show()

**Analysis:**
* Around fifty percent of the revenue from selling video games came from the North American region, followed by the EU and Japan.
* A little over a quarter of the revenue came from the European region.

# The Growth of Video Sales in different regions

In [None]:
# Growth of Sales in different regions
sales_list = df.iloc[:,-5:-1].columns.to_list() # list of sales column that exclude global sales
sales_by_year = df[sales_list].groupby(df['Year']).sum()
sales_by_year.head()

In [None]:
# Linegraph showing groth of video games sales in different regions

sns.set_style("darkgrid")
plt.figure(figsize=(12,8))
sns.lineplot(data=sales_by_year, palette='mako_r',linewidth=3)
plt.title("Growth in Game sold across regions",fontsize=15)
plt.xlabel("Year",fontsize=12)
plt.ylabel("Sales (in millions)",fontsize=12)
plt.show()

# Popular Platform in all regions

In [None]:
# Platform and its sales acorss 
platform_sales = df.groupby('Platform').sum().sort_values(by='Global_Sales',ascending = False).iloc[:,2:]
platform_sales.head()

In [None]:
# Top Five Sales distribution across different platform for four regions
figure, axes = plt.subplots(nrows=2,ncols=2,sharey=True,figsize=(15,10))

sns.set_style('whitegrid')

sns.barplot(ax=axes[0,0],x=platform_sales.sort_values(by='NA_Sales',ascending=False).head(5).index, 
            y=platform_sales['NA_Sales'].sort_values(ascending=False).head(5),palette='viridis')
axes[0,0].set_ylabel('NA Sales (in millions)',fontsize=12)


sns.barplot(ax=axes[0,1],x=platform_sales.head(5).sort_values(by='EU_Sales',ascending=False).index, 
            y=platform_sales['EU_Sales'].sort_values(ascending=False).head(5),palette='magma')
axes[0,1].set_ylabel('EU Sales (in millions)',fontsize=12)

sns.barplot(ax=axes[1,0],x=platform_sales.sort_values(by='JP_Sales',ascending=False).head(5).index,
            y=platform_sales['JP_Sales'].sort_values(ascending=False).head(5),palette='Pastel1')
axes[1,0].set_ylabel('JP Sales (in millions)',fontsize=12)

sns.barplot(ax=axes[1,1],x=platform_sales.sort_values(by='Other_Sales',ascending=False).head(5).index, 
            y=platform_sales['Other_Sales'].sort_values(ascending=False).head(5),palette='PuBuGn_r')
axes[1,1].set_ylabel('Other Sales (in millions)',fontsize=12);

In [None]:
# Top Five platform sales Globally
plt.figure(figsize=(12,8))
sns.barplot(x=platform_sales.head(5).index, 
            y=platform_sales['Global_Sales'].sort_values(ascending=False).head(5),palette='magma')
plt.show()

# Overall Publisher Global Sales

In [None]:
# Highest Earning Publisher Globally All Time
market_share = df['Global_Sales'].groupby(df['Publisher']).sum().sort_values(ascending=False).to_frame().reset_index().head(10)
market_share

In [None]:
plt.figure(figsize=(12,8))
ax = sns.barplot(x="Global_Sales",y="Publisher",data=market_share,palette='Set2')

for p in ax.patches:
    width = p.get_width() 
    ax.text(width - width/2,p.get_y() + p.get_height() / 2,'{:1.2f}'.format(width), 
            ha = 'left',va = 'center',fontsize=12)  

plt.xlabel("Global Sales (in millions)",fontsize=12)
plt.ylabel("Publisher",fontsize=12)
plt.show()

# Publisher Global Sales in the Last Decade

In [None]:
# Last Decade 2010 (and onward)
plt.figure(figsize=(12,8))
publisher_2010 = df[df['Year'] > 2010].groupby(df['Publisher']).sum().sort_values(by='Global_Sales',ascending=False).head(10)
ax = sns.barplot(x='Global_Sales',y=publisher_2010.index,data=publisher_2010,palette='pastel')

for p in ax.patches:
    width = p.get_width() 
    ax.text(width - width/2,p.get_y() + p.get_height() / 2,'{:1.2f}'.format(width), 
            ha = 'left',va = 'center',fontsize=12)  

plt.xlabel("Global Sales (in millions)",fontsize=12)
plt.ylabel("Publisher",fontsize=12)
plt.show()

**Analysis:**
* Overall, games published by Nintendo had the most sales. However, in the last decade, sales on games published by Eletronic Arts exceed that of Nintendo/

# Electronic Arts

In [None]:
# EA Game Genre Distribution
ea_game = df[df['Publisher'] == 'Electronic Arts'].sort_values(by='Global_Sales',ascending=False)
plt.figure(figsize=(12,8))
sns.countplot(data=ea_game,x='Genre',palette='Set3',order = ea_game['Genre'].value_counts().index)
plt.xticks(rotation=45)
plt.show()

In [None]:
# EA Top 10 highest selling game
plt.figure(figsize=(12,8))
sns.barplot(data=ea_game,x=ea_game['Name'].head(10),y=ea_game['Global_Sales'].head(10),palette='deep')
plt.xticks(rotation=90)
plt.ylabel("Global Sales (in millions)",fontsize=12)
plt.show()

# Top 10 Globally Sold Game

In [None]:
# Top 10 Globally Sold game
top_game_sales_gb = df.sort_values(by='Global_Sales',ascending = False).head(10)
top_game_sales_gb

In [None]:
# Bar Chart of Top 10 Globally Sold games
plt.figure(figsize=(12,8))
g = sns.barplot(x='Name',y='Global_Sales',data=top_game_sales_gb,palette='magma')
plt.title("Top 10 Games Sold Globally",fontsize=15)
plt.xticks(rotation=45)

plt.show()

# Top 10 Game Genre

In [None]:
# Top 10 Genre
plt.figure(figsize=(12,8))
sns.countplot(x='Genre',data=df,order=df['Genre'].value_counts().index)
plt.xticks(rotation=45)
plt.title("Top 10 Game Genre",fontsize=15);

In [None]:
# Growth of the Top Five Genre Sold Globally 
year_genre_gb_sales = df.drop('Rank',axis=1).groupby(['Year','Genre']).sum()['Global_Sales'].to_frame()

top_year_sales = year_genre_gb_sales.reset_index(level='Genre')
top_genre = df.groupby('Genre').count().sort_values(by='Rank',ascending=False).head(5)
top5_year_sales = top_year_sales[top_year_sales['Genre'].isin(top_genre.index.to_list())]
top5_year_sales.head()

In [None]:
# Line Graph of Top Five Genre Sold Globally
plt.figure(figsize=(12,8))

sns.lineplot(data=top5_year_sales,x='Year',y='Global_Sales',hue='Genre',
             style='Genre',markers=True,
             palette="magma",linewidth=3);

# Wii Platform

In [None]:
# Popular Genre of game released on Wii Platform
wii_platform = df[df['Platform'] == "Wii"]
wii_platform.head()

In [None]:
# Bar Chart Ranking popular genre on Wii Platform
plt.figure(figsize=(12,8))
ax = sns.countplot(x='Genre',data=wii_platform,
              order = wii_platform['Genre'].value_counts().index,palette='viridis')
# Annotation
for p in ax.patches:
        ax.annotate('{:.2f}%'.format(p.get_height()/wii_platform.shape[0] * 100),
                    (p.get_x()+0.1, p.get_height()+5), fontsize=12)

plt.title("Ranking Popular Game Genre released on Wii Platform",fontsize=15)
plt.xticks(rotation = 45);

In [None]:
# Top 5 Wii Game
wii_game = df[df['Name'].str.contains("Wii")]
wii_game.head(5)

# Pokemon ◓

In [None]:
# Pokemon Game by its Genre
pokemon = df[df['Name'].str.contains("Pokemon")]
pokemon_genre = pokemon.groupby('Genre').count()['Rank'].sort_values(ascending = False).to_frame(name='Count')
pokemon_genre.head()

In [None]:
# Pie Chart of Pokemon genre
pokemon_genre.plot.pie(y='Count',autopct="%1.1f%%",figsize=(12,8),explode=[0.05] * 8,cmap='Pastel1',
                       fontsize=12,textprops={'color':"black"})
plt.title("Pokemon Genre Distribution",fontsize=15,fontname="fantasy")
plt.ylabel("")
plt.legend(loc="upper right")
plt.tight_layout()

In [None]:
# Pokemon Game that earned highest revenue
pokemon.head()

In [None]:
pokemon_sales = pokemon.groupby('Genre').sum().sort_values(by='Global_Sales',ascending=False)
pokemon_sales

In [None]:
# Top Five Pokemon Genre Sales across regions
figure, axes = plt.subplots(nrows=2,ncols=2,sharey=True,figsize=(15,10))

sns.set_style('whitegrid')

ax1 = sns.barplot(ax=axes[0,0],x=pokemon_sales.sort_values(by='NA_Sales',ascending=False).head(5).index, 
            y=pokemon_sales['NA_Sales'].sort_values(ascending=False).head(5),palette='viridis')
axes[0,0].set_ylabel('NA Sales (in millions)',fontsize=12)


ax2 = sns.barplot(ax=axes[0,1],x=pokemon_sales.head(5).sort_values(by='EU_Sales',ascending=False).index, 
            y=pokemon_sales['EU_Sales'].sort_values(ascending=False).head(5),palette='magma')
axes[0,1].set_ylabel('EU Sales (in millions)',fontsize=12)

ax3 = sns.barplot(ax=axes[1,0],x=pokemon_sales.sort_values(by='JP_Sales',ascending=False).head(5).index,
            y=pokemon_sales['JP_Sales'].sort_values(ascending=False).head(5),palette='Pastel1')
axes[1,0].set_ylabel('JP Sales (in millions)',fontsize=12)

ax4 = sns.barplot(ax=axes[1,1],x=pokemon_sales.sort_values(by='Other_Sales',ascending=False).head(5).index, 
            y=pokemon_sales['Other_Sales'].sort_values(ascending=False).head(5),palette='PuBuGn_r')
axes[1,1].set_ylabel('Other Sales (in millions)',fontsize=12);

# Method to add Sales lable on top of the bar chart
def addingSales(ax):
    for p in ax.patches:
            ax.annotate('{:.2f}'.format(p.get_height()),
                        (p.get_x()+0.2, p.get_height()+0.2), fontsize=12)
            
for ax in [ax1,ax2,ax3,ax4]:
    addingSales(ax)

**Analysis:**
* Regardless of the regions, Pokemon games listed under Role-Playing Genre outperformed in terms of revenue in comparison to other genre.


In [None]:
# Growth of Pokemon by year
pokemon_growth = pokemon.groupby('Year').sum().drop(['Rank','Global_Sales'],axis=1)
pokemon_growth.head()

In [None]:
# Line graph for Growth of Pokemon by year in different region
plt.figure(figsize=(12,8))
g = sns.lineplot(data=pokemon_growth,palette='magma',linewidth=3)

plt.ylabel("Sales (in millions)",fontsize=12)

year_list = [num for num in range(1997,2016,2)]
g.set_xticks(year_list)
g.set_xticklabels(year_list);