# Video Games Released in 2022
Dataset can be found @: https://www.kaggle.com/datasets/mattop/video-games-released-in-2022

## Import Libraries & Dataset

In [20]:
import numpy as np
import pandas as pd
import plotly.express as px
from matplotlib import pyplot as plt

In [21]:
# View dataset
vg = pd.read_csv('video-games-2022.csv') 
vg

Unnamed: 0,Month,Day,Title,Platform(s),Genre(s),Developer(s),Publisher(s)
0,JANUARY,3,Freddi Fish 3: The Case of the Stolen Conch Shell,NS,Adventure,Humongous Entertainment,UFO Interactive Games
1,JANUARY,3,Putt-Putt Travels Through Time,NS,Adventure,Humongous Entertainment,UFO Interactive Games
2,JANUARY,4,Deep Rock Galactic,"PS4, PS5",First-person shooter,Ghost Ship Games,Coffee Stain Publishing
3,JANUARY,4,The Pedestrian,"XBO, XSX","Puzzle, platform",Skookum Arts LLC,Skookum Arts LLC
4,JANUARY,6,Demon Gaze Extra,"NS, PS4",Role-playing,Cattle Call,Clouded Leopard Entertainment
...,...,...,...,...,...,...,...
982,DECEMBER,22,Uta no Prince-sama All Star After Secret,NS,Visual novel,Nippon Ichi Software,Broccoli
983,DECEMBER,22,Valkyrie Profile: Lenneth,"PS4, PS5",Role-playing,tri-Ace,Square Enix
984,DECEMBER,22,Naraka: Bladepoint,XBO,"Action-adventure, battle royale",Thunder Fire Universe X Studio,NetEase Games
985,DECEMBER,23,Sports Story,NS,"Adventure, sports",Sidebar Games,Sidebar Games


## Initial Exploring / Light Cleaning

In [22]:
# Check Nulls
vg.info() 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 987 entries, 0 to 986
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Month         987 non-null    object
 1   Day           987 non-null    int64 
 2   Title         987 non-null    object
 3   Platform(s)   987 non-null    object
 4   Genre(s)      987 non-null    object
 5   Developer(s)  987 non-null    object
 6   Publisher(s)  987 non-null    object
dtypes: int64(1), object(6)
memory usage: 54.1+ KB


In [23]:
# Not clean
vg['Genre(s)'].unique() 

array(['Adventure', 'First-person shooter', 'Puzzle, platform',
       'Role-playing', 'Puzzle', 'Train simulator', 'Action role-playing',
       'Digital collectible card game', 'Sandbox, adventure',
       'Fighting, party', 'Rhythm', 'Scrolling shooter',
       'Role-playing, dungeon crawler', 'Action-adventure',
       'Platform, roguelike', "Beat 'em up",
       'Digital collectible card game, strategy',
       'Action, roguelite, platform', 'Adventure, rhythm',
       'Tactical role-playing', 'Stealth', 'Photography',
       'Action role-playing, sports', 'Tactical shooter', 'Sports',
       'Adventure, visual novel', 'Puzzle, adventure',
       'Visual novel, role-playing, social deduction', 'Visual novel',
       'Platform', 'Simulation', 'Racing', 'Government simulation',
       'Action', 'Survival, tactical role-playing', 'Action, platform',
       'Adventure, casual', 'Action role-playing, metroidvania',
       'Puzzle-platform', 'Roguelite, first-person shooter',
       'Ac

In [24]:
# Also not clean
vg['Platform(s)'].unique() 

array(['NS', 'PS4, PS5', 'XBO, XSX', 'NS, PS4', 'Win', 'iOS, Droid',
       'PS4, XBO', 'Win, XBO, XSX', 'PS5', 'Win, NS, PS4, PS5, XBO, XSX',
       'Win, Mac, Lin', 'Win, PS4, PS5, XBO, XSX', 'Win, Mac, XBO, XSX',
       'Win, NS, PS4, PS5, XBO', 'Win, PS4, PS5, XBO, XSX, Stadia',
       'Win, NS, PS4, XBO, Stadia', 'XBO', 'Win, Mac', 'NS, PS4, PS5',
       'PS4, PS5, XBO, XSX', 'Stadia', 'PS4', 'NS, PS4, PS5, XBO, XSX',
       'Win, PS4, XBO, Stadia', 'NS, PS4, XBO', 'Win, PS4, PS5',
       'Win, Mac, Lin, NS, PS4, PS5, XBO, XSX, Atari VCS',
       'Win, NS, PS4, XBO', 'Win, NS, PS4, XBO, XSX', 'PS5, XSX',
       'Win, NS, PS4, PS5, XBO, XSX, Stadia', 'Win, PS4, PS5, XSX',
       'Win, NS', 'Win, NS, PS4', 'Win, NS, PS4, PS5', 'Win, iOS, Droid',
       'Win, NS,[c] PS4[c]', 'Win, Lin', 'Win, PS4, XBO', 'XSX', 'Arcade',
       'NS, Atari VCS', '3DS, Win, NS, PS4', 'Win, PS5',
       'Win, Mac, NS, XBO', 'PS4, XBO, XSX',
       'Win, Mac, Lin, NS, PS4, PS5, XBO, XSX', 'NS, XBO, XSX',


### Clean Columns / Create Separate Dataframes

In [25]:
# Create dataframe with Platform column separated and clean the messy values
import re

# Define a regular expression to match platform names
platform_regex = re.compile(r'[A-Za-z0-9\[\]]+')

# Apply the regular expression to the "Platform(s)" column and explode the resulting dataframe
vg_plat = vg.assign(Platform=vg['Platform(s)'].str.findall(platform_regex).apply
                    (lambda x: [s.replace('[c]', '').replace('WW', '').replace('JP', '')
                    .replace('EU', '').strip() for s in x])).explode('Platform')

# Drop the original "Platform(s)" column and rename the new "Platform" column
vg_plat = vg_plat.drop('Platform(s)', axis=1).rename(columns={'Platform': 'Platform(s)'})

# Get rid of null values
vg_plat['Platform(s)'].replace('', np.nan, inplace=True)  
vg_plat.dropna(subset=['Platform(s)'], inplace=True)

In [26]:
# View platforms dataframe
vg_plat 

Unnamed: 0,Month,Day,Title,Genre(s),Developer(s),Publisher(s),Platform(s)
0,JANUARY,3,Freddi Fish 3: The Case of the Stolen Conch Shell,Adventure,Humongous Entertainment,UFO Interactive Games,NS
1,JANUARY,3,Putt-Putt Travels Through Time,Adventure,Humongous Entertainment,UFO Interactive Games,NS
2,JANUARY,4,Deep Rock Galactic,First-person shooter,Ghost Ship Games,Coffee Stain Publishing,PS4
2,JANUARY,4,Deep Rock Galactic,First-person shooter,Ghost Ship Games,Coffee Stain Publishing,PS5
3,JANUARY,4,The Pedestrian,"Puzzle, platform",Skookum Arts LLC,Skookum Arts LLC,XBO
...,...,...,...,...,...,...,...
983,DECEMBER,22,Valkyrie Profile: Lenneth,Role-playing,tri-Ace,Square Enix,PS5
984,DECEMBER,22,Naraka: Bladepoint,"Action-adventure, battle royale",Thunder Fire Universe X Studio,NetEase Games,XBO
985,DECEMBER,23,Sports Story,"Adventure, sports",Sidebar Games,Sidebar Games,NS
986,DECEMBER,28,Hyper Gunsport,Action,Necrosoft Games,Necrosoft Games,PS4


In [27]:
# View each platform
print(vg_plat['Platform(s)'].unique()) 

['NS' 'PS4' 'PS5' 'XBO' 'XSX' 'Win' 'iOS' 'Droid' 'Mac' 'Lin' 'Stadia'
 'Atari' 'VCS' 'Arcade' '3DS']


In [28]:
# Create Dataframe with all values separated
vg_all = vg_plat.copy()
vg_all = pd.concat([vg_all, vg_plat['Genre(s)'].str.split(',', expand=True, )], axis=1) 
vg_all = vg_all.melt(id_vars=['Month', 'Day', 'Title', 'Platform(s)'],
                    value_vars=range(4), value_name='Genre')
vg_all = vg_all[vg_all['Genre'].notna()]

vg_all.head(12)

Unnamed: 0,Month,Day,Title,Platform(s),variable,Genre
0,JANUARY,3,Freddi Fish 3: The Case of the Stolen Conch Shell,NS,0,Adventure
1,JANUARY,3,Putt-Putt Travels Through Time,NS,0,Adventure
2,JANUARY,4,Deep Rock Galactic,PS4,0,First-person shooter
3,JANUARY,4,Deep Rock Galactic,PS5,0,First-person shooter
4,JANUARY,4,The Pedestrian,XBO,0,Puzzle
5,JANUARY,4,The Pedestrian,XSX,0,Puzzle
6,JANUARY,6,Demon Gaze Extra,NS,0,Role-playing
7,JANUARY,6,Demon Gaze Extra,PS4,0,Role-playing
8,JANUARY,6,QuickSpot,NS,0,Puzzle
9,JANUARY,10,Japanese Rail Sim: Journey to Kyoto,NS,0,Train simulator


In [29]:
# Create Genre Dataframe
vg_genre = vg.copy()
vg_genre = pd.concat([vg_genre, vg['Genre(s)'].str.split(',', expand=True, )], axis=1) 
vg_genre = vg_genre.melt(id_vars=['Month', 'Day', 'Title', 'Platform(s)'],
                    value_vars=range(4), value_name='Genre')
vg_genre = vg_genre[vg_genre['Genre'].notna()]
#vg_genre

In [30]:
# Clean Genre column (Couldn't figure out a way to effectively mass clean, any suggestions are welcome)
vg['Genre(s)'] = vg['Genre(s)'].str.strip()
vg['Genre(s)'] = vg['Genre(s)'].replace('action', 'Action')
vg['Genre(s)'] = vg['Genre(s)'].replace('adventure', 'Aventure')  
#print(vg_all['Genre'].unique())

## Exploratory Data Analysis

### Amount of Video Games Released Per Month


In [31]:
# Show total games released in 2022
print(f'Total Games Released: {vg.Title.count()}') 

# Show games released per month
vg.Month.value_counts() 

Total Games Released: 987


JUNE         127
JULY         112
SEPTEMBER    110
AUGUST        95
MAY           83
OCTOBER       76
FEBRUARY      74
MARCH         71
NOVEMBER      70
APRIL         63
JANUARY       57
DECEMBER      49
Name: Month, dtype: int64

In [32]:
#Graph
px.histogram(vg, x='Month', 
            title='Number of Games Released per Month').update_layout(   
            yaxis_title='Games Released') 

*Observations*
- A majority of games were released in the middle of the year (most in june). The Least games were released in january and December(the beginning of the year and the very end).

### Amount of Video Games Released by Day

In [33]:
# Graph
px.histogram(vg, x='Day',
            nbins=31,
            title='Number of Games Released by Day').update_layout(  
            bargap=0.07, yaxis_title='Games Released',
            xaxis = dict(
                tickmode = 'linear',
                tick0 = 1,
                dtick = 1)
            ) 

### Amount of Video Games Released Per Platform


In [34]:
# Show number of games released per platform in 2022
vg_plat['Platform(s)'].value_counts()

NS        492
Win       490
PS4       407
PS5       319
XBO       304
XSX       284
iOS        54
Mac        50
Stadia     46
Droid      45
Lin        32
Atari       4
VCS         4
Arcade      1
3DS         1
Name: Platform(s), dtype: int64

In [35]:
# Graph above info
px.histogram(vg_plat, y='Platform(s)',
             title='Amount of Games Released Per Platform').update_layout(
            xaxis_title='Games Released').update_yaxes(
            categoryorder='total ascending')

In [36]:
# Graph number of games released per month by platform
px.histogram(vg_plat, x='Month', 
            color='Platform(s)', 
            title='Amount of Games Released Per Month by Platform').update_layout(  
            yaxis_title='Games Released') 

*Observations*
- For the most part the older gen version of consoles(example: PS4 is older than PS5) released more games than the newer generation per month.
    This happened more with playstation than with xbox.
- Windows and Nintendo Switch released the most games per month, with windows only behind by 2 games overall.

### Number of games Released by Genre

In [37]:
vg_genre['Genre'].value_counts()

Adventure              98
Action role-playing    85
Role-playing           77
Action-adventure       64
Visual novel           62
                       ..
Narrative adventure     1
Deck-building           1
Brawler                 1
Action game             1
 role-playing game      1
Name: Genre, Length: 168, dtype: int64

In [38]:
px.histogram(vg_genre, x='Genre').update_xaxes(categoryorder='total descending')

*Observations*
- A Majority of games released are some form of adventure or role-playing.