In [30]:
import pandas as pd

import matplotlib.pyplot as plt

# Read the csv file from the Data Folder
df = pd.read_csv("../data/best_selling_game_consoles.csv")

print("Shape:", df.shape)
print("Columns:", df.columns.tolist())
print(df.head(5))
df.info()

Shape: (51, 7)
Columns: ['Console Name', 'Type', 'Company', 'Released Year', 'Discontinuation Year', 'Units sold (million)', 'Remarks']
      Console Name      Type   Company  Released Year  Discontinuation Year  \
0    PlayStation 2      Home      Sony           2000                  2013   
1      Nintendo DS  Handheld  Nintendo           2004                  2013   
2  Nintendo Switch    Hybrid  Nintendo           2017                     0   
3         Game Boy  Handheld  Nintendo           1989                  2003   
4   Game Boy Color  Handheld  Nintendo           1998                  2003   

   Units sold (million)                                            Remarks  
0                155.00           Final sales are greater than 155 million  
1                154.02                                                NaN  
2                122.55                                                NaN  
3                 64.42  The Game Boy (1989) and the Game Boy Color (19...  
4   

In [15]:
print("Missing values per column:\n", df.isna().sum())

Missing values per column:
 Console Name             0
Type                     0
Company                  0
Released Year            0
Discontinuation Year     0
Units sold (million)     0
Remarks                 35
dtype: int64


Looking at the data what we can focus on is Console Name,
Type, Company, Released Year, Discontinued Year, Units sold.

In [16]:
# Drop Remarks Columns
df = df.drop(columns=["Remarks"])

In [19]:
print("Unique consoles", df["Console Name"].unique())

Unique consoles ['PlayStation 2' 'Nintendo DS' 'Nintendo Switch' 'Game Boy'
 'Game Boy Color' 'PlayStation 4' 'PlayStation' 'Wii' 'PlayStation 3'
 'Xbox 360' 'Game Boy Advance' 'PlayStation Portable' 'Nintendo 3DS'
 'NES/Famicom' 'Xbox One' 'SNES/Super Famicom' 'Game & Watch'
 'Nintendo 64' 'Sega Genesis/Mega Drive' 'PlayStation 5' 'Atari 2600'
 'Xbox' 'GameCube' 'Xbox Series X/S' 'Wii U' 'PlayStation Vita'
 'Sega Master System' 'Sega Game Gear' 'PC Engine/TurboGrafx-16'
 'Sega Saturn' 'Dreamcast' 'Master System(Brazilian variants)'
 'Dendy(Famiclone)' 'Super NES Classic Edition' 'Famicom Disk System'
 'NES Classic Edition' 'WonderSwan' 'Sega Pico' 'Color TV-Game'
 'Intellivision' 'Mega Drive(Brazilian variants)' 'N-Gage' 'Sega CD'
 'ColecoVision' 'Magnavox Odyssey' 'PC Engine CD-ROM' 'Atari 7800'
 'Atari Lynx' 'Philips CD-i' 'Telstar' 'Atari 5200']


In [21]:
print("Number of unique consoles:", df["Console Name"].nunique())

Number of unique consoles: 51


In [22]:
# Lets rename the Units sold (million) to UnitSoldsMillion
df = df.rename(columns={"Units sold (million)": "UnitSoldsMillion"})

In [27]:
# Explaratory Analysis
# Top 5 Best Selling Consoles
top5 = df.nlargest(5, "UnitSoldsMillion")[["Console Name", "Company", "UnitSoldsMillion"]]
print(top5)

      Console Name   Company  UnitSoldsMillion
0    PlayStation 2      Sony            155.00
1      Nintendo DS  Nintendo            154.02
2  Nintendo Switch  Nintendo            122.55
5    PlayStation 4      Sony            117.20
6      PlayStation      Sony            102.49


# The insights from the above
- These five dominate the all time charts. Notably, Sony's Playstation 2 is the
  best selling console ever, slighty ahead of the headheld Nintendo DS.
- The switch has nearly made the same amount of money as the Nintendo DS has so far
  within similar timeframe despite being new as well and experimental.