In [16]:
import pandas as pd

import matplotlib.pyplot as plt

# Read the csv file from the Data Folder
df = pd.read_csv("../data/best_selling_game_consoles.csv")

print("Shape:", df.shape)
print("Columns:", df.columns.tolist())
print(df.head(5))
df.info()

Shape: (51, 7)
Columns: ['Console Name', 'Type', 'Company', 'Released Year', 'Discontinuation Year', 'Units sold (million)', 'Remarks']
      Console Name      Type   Company  Released Year  Discontinuation Year  \
0    PlayStation 2      Home      Sony           2000                  2013   
1      Nintendo DS  Handheld  Nintendo           2004                  2013   
2  Nintendo Switch    Hybrid  Nintendo           2017                     0   
3         Game Boy  Handheld  Nintendo           1989                  2003   
4   Game Boy Color  Handheld  Nintendo           1998                  2003   

   Units sold (million)                                            Remarks  
0                155.00           Final sales are greater than 155 million  
1                154.02                                                NaN  
2                122.55                                                NaN  
3                 64.42  The Game Boy (1989) and the Game Boy Color (19...  
4   

In [17]:
print("Missing values per column:\n", df.isna().sum())

Missing values per column:
 Console Name             0
Type                     0
Company                  0
Released Year            0
Discontinuation Year     0
Units sold (million)     0
Remarks                 35
dtype: int64


Looking at the data what we can focus on is Console Name,
Type, Company, Released Year, Discontinued Year, Units sold.

In [18]:
# Drop Remarks Columns
df = df.drop(columns=["Remarks"])

In [19]:
print("Unique consoles", df["Console Name"].unique())

Unique consoles ['PlayStation 2' 'Nintendo DS' 'Nintendo Switch' 'Game Boy'
 'Game Boy Color' 'PlayStation 4' 'PlayStation' 'Wii' 'PlayStation 3'
 'Xbox 360' 'Game Boy Advance' 'PlayStation Portable' 'Nintendo 3DS'
 'NES/Famicom' 'Xbox One' 'SNES/Super Famicom' 'Game & Watch'
 'Nintendo 64' 'Sega Genesis/Mega Drive' 'PlayStation 5' 'Atari 2600'
 'Xbox' 'GameCube' 'Xbox Series X/S' 'Wii U' 'PlayStation Vita'
 'Sega Master System' 'Sega Game Gear' 'PC Engine/TurboGrafx-16'
 'Sega Saturn' 'Dreamcast' 'Master System(Brazilian variants)'
 'Dendy(Famiclone)' 'Super NES Classic Edition' 'Famicom Disk System'
 'NES Classic Edition' 'WonderSwan' 'Sega Pico' 'Color TV-Game'
 'Intellivision' 'Mega Drive(Brazilian variants)' 'N-Gage' 'Sega CD'
 'ColecoVision' 'Magnavox Odyssey' 'PC Engine CD-ROM' 'Atari 7800'
 'Atari Lynx' 'Philips CD-i' 'Telstar' 'Atari 5200']


In [20]:
print("Number of unique consoles:", df["Console Name"].nunique())

Number of unique consoles: 51


In [21]:
# Lets rename the Units sold (million) to UnitSoldsMillion
df = df.rename(columns={"Units sold (million)": "UnitSoldsMillion"})

In [22]:
# Explaratory Analysis
# Top 5 Best Selling Consoles
top5 = df.nlargest(5, "UnitSoldsMillion")[["Console Name", "Company", "UnitSoldsMillion"]]
print(top5)

      Console Name   Company  UnitSoldsMillion
0    PlayStation 2      Sony            155.00
1      Nintendo DS  Nintendo            154.02
2  Nintendo Switch  Nintendo            122.55
5    PlayStation 4      Sony            117.20
6      PlayStation      Sony            102.49


# The insights from the above
- These five dominate the all time charts. Notably, Sony's Playstation 2 is the
  best selling console ever, slighty ahead of the headheld Nintendo DS.
- The switch has nearly made the same amount of money as the Nintendo DS has so far
  within similar timeframe despite being new as well and experimental.

In [23]:
best = df.loc[df['UnitSoldsMillion'].idxmax()]
print(best)

Console Name            PlayStation 2
Type                             Home
Company                          Sony
Released Year                    2000
Discontinuation Year             2013
UnitSoldsMillion                155.0
Name: 0, dtype: object


In [24]:
companes_sales = df.groupby("Company")["UnitSoldsMillion"].sum().sort_values(ascending=False)
print(companes_sales)

Company
Nintendo                   883.11
Sony                       589.09
Microsoft                  185.00
Sega                        78.40
Atari                       33.00
Tectoy                      11.00
NEC/Hudson Soft[note 6]     10.00
Micro Genius                 6.00
Bandai                       3.50
Coleco                       3.00
Mattel                       3.00
Nokia                        3.00
Magnavox/Philips             2.00
NEC                          1.92
Philips                      1.00
Name: UnitSoldsMillion, dtype: float64


# Insights
- Nintendo dominates in sales compared to all other console developing companies.
- Nintendo is the only japanese console developing company that hits over 100 million sales.
- Nintendo has more consoles compared to the other runnerups equalling out more sales in general.

In [26]:
# Home vs Handheld Consoles Sales
type_sales = df.groupby("Type")["UnitSoldsMillion"].sum().sort_values(ascending=False)
print(type_sales)

Type
Home                   1090.50
Handheld                578.47
Hybrid                  122.55
Dedicated                12.84
Home console add-on       8.66
Name: UnitSoldsMillion, dtype: float64


# Insight
- Hometype consoles like Playstation, Xbox, etc, have a higher combined sales, more then double of handhelds sales total.

In [31]:
# Average by decade
df["Decade"] = (df['Released Year']//10)*10
decade_sales = df.groupby("Decade")["UnitSoldsMillion"].mean().reset_index().sort_values(by="UnitSoldsMillion",ascending=False)
print(decade_sales)

   Decade  UnitSoldsMillion
3    2000         79.430000
4    2010         51.448750
5    2020         24.250000
2    1990         21.286923
1    1980         17.564286
0    1970          9.000000


In [32]:
# Company Consistency
avg_sales = df.groupby("Company")["UnitSoldsMillion"].mean().reset_index()
print(avg_sales)

                    Company  UnitSoldsMillion
0                     Atari          8.250000
1                    Bandai          3.500000
2                    Coleco          1.500000
3          Magnavox/Philips          2.000000
4                    Mattel          3.000000
5              Micro Genius          6.000000
6                 Microsoft         46.250000
7                       NEC          1.920000
8   NEC/Hudson Soft[note 6]         10.000000
9                  Nintendo         51.947647
10                    Nokia          3.000000
11                  Philips          1.000000
12                     Sega         11.200000
13                     Sony         84.155714
14                   Tectoy          5.500000


# Insight
- Sony is very consistant, Nintendo is more mixed, and Microsoft overall sold less.

# Key Takeaways
- Top consoles: PS4
- Market Share: Nintendo
- By Type: Home consoles sold the most overall
- Trend: Consoles released after 2000 sold much more than earlier
- Company patterns: Sony is the most consistent, Nintendo is more mixed with highs and lows, and Microsoft is
consitant overall less sales overall