In [87]:
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import seaborn as sns
from random import randrange
from scipy.stats import linregress
# Study data files
path = "Resources/backloggd_games.csv"

# Read the mouse data and the study results
game_df = pd.read_csv(path)

# Display the data table for preview
game_df.head()

Unnamed: 0.1,Unnamed: 0,Title,Release_Date,Developers,Summary,Platforms,Genres,Rating,Plays,Playing,Backlogs,Wishlist,Lists,Reviews
0,0,Elden Ring,"Feb 25, 2022","['FromSoftware', 'Bandai Namco Entertainment']","Elden Ring is a fantasy, action and open world...","['Windows PC', 'PlayStation 4', 'Xbox One', 'P...","['Adventure', 'RPG']",4.5,21K,4.1K,5.6K,5.5K,4.6K,3K
1,1,The Legend of Zelda: Breath of the Wild,"Mar 03, 2017","['Nintendo', 'Nintendo EPD Production Group No...",The Legend of Zelda: Breath of the Wild is the...,"['Wii U', 'Nintendo Switch']","['Adventure', 'Puzzle']",4.4,35K,3.1K,5.6K,3K,5.1K,3K
2,2,Hades,"Dec 07, 2018",['Supergiant Games'],A rogue-lite hack and slash dungeon crawler in...,"['Windows PC', 'Mac', 'PlayStation 4', 'Xbox O...","['Adventure', 'Brawler', 'Indie', 'RPG']",4.3,25K,3.5K,7.3K,4K,3.2K,2.1K
3,3,Hollow Knight,"Feb 24, 2017",['Team Cherry'],A 2D metroidvania with an emphasis on close co...,"['Windows PC', 'Mac', 'Linux', 'Nintendo Switch']","['Adventure', 'Indie', 'Platform']",4.4,25K,2.7K,9.6K,2.6K,3.4K,2.1K
4,4,Undertale,"Sep 15, 2015","['tobyfox', '8-4']","A small child falls into the Underground, wher...","['Windows PC', 'Mac', 'Linux', 'PlayStation 4'...","['Adventure', 'Indie', 'RPG', 'Turn Based Stra...",4.2,32K,728,5.7K,2.1K,3.9K,2.5K


In [88]:
# Oh the errors that be. So I ran into a weird issue when trying to count the platforms and genres, one that would occur if I did the .str.replace function to get rid of the "[,],'" first.
# I did a simple str.split function to turn the PLatforms and Genres column into a list using the comma (',').
game_df['Platforms'] = game_df['Platforms'].str.split(',')
game_df['Genres'] = game_df['Genres'].str.split(',')

In [89]:
# After converting those columns into a list, I could now continue with creating a code that can count the Platforms and Genres, then put them in new columns.
# I thought that maybe len(x) could work, it failed, then tried .apply len(x), which also failed.
# There was no specific function to accomplish what I needed, which reminded me of lambda, which I had previously looked up because the TA had given the group a lambda function before.
# After repeated trial and error I finally came up with the code below.
# ".apply" applies all the things in the () to the Platforms column. 
# "lambda x:" is used because "lambda", x, and colon, must be used in that order to start the lambda function.
# "len(x)" is used to count the x or elements within the cell.
platform_counts = game_df['Platforms'].apply(lambda x: len(x))
genre_counts = game_df['Genres'].apply(lambda x: len(x))

# Adding new columns for platform and genre counts through the use of brackets..
game_df['Platform_Count'] = platform_counts
game_df['Genre_Count'] = genre_counts

game_df.head()

Unnamed: 0.1,Unnamed: 0,Title,Release_Date,Developers,Summary,Platforms,Genres,Rating,Plays,Playing,Backlogs,Wishlist,Lists,Reviews,Platform_Count,Genre_Count
0,0,Elden Ring,"Feb 25, 2022","['FromSoftware', 'Bandai Namco Entertainment']","Elden Ring is a fantasy, action and open world...","[['Windows PC', 'PlayStation 4', 'Xbox One',...","[['Adventure', 'RPG']]",4.5,21K,4.1K,5.6K,5.5K,4.6K,3K,5,2
1,1,The Legend of Zelda: Breath of the Wild,"Mar 03, 2017","['Nintendo', 'Nintendo EPD Production Group No...",The Legend of Zelda: Breath of the Wild is the...,"[['Wii U', 'Nintendo Switch']]","[['Adventure', 'Puzzle']]",4.4,35K,3.1K,5.6K,3K,5.1K,3K,2,2
2,2,Hades,"Dec 07, 2018",['Supergiant Games'],A rogue-lite hack and slash dungeon crawler in...,"[['Windows PC', 'Mac', 'PlayStation 4', 'Xb...","[['Adventure', 'Brawler', 'Indie', 'RPG']]",4.3,25K,3.5K,7.3K,4K,3.2K,2.1K,7,4
3,3,Hollow Knight,"Feb 24, 2017",['Team Cherry'],A 2D metroidvania with an emphasis on close co...,"[['Windows PC', 'Mac', 'Linux', 'Nintendo S...","[['Adventure', 'Indie', 'Platform']]",4.4,25K,2.7K,9.6K,2.6K,3.4K,2.1K,4,3
4,4,Undertale,"Sep 15, 2015","['tobyfox', '8-4']","A small child falls into the Underground, wher...","[['Windows PC', 'Mac', 'Linux', 'PlayStatio...","[['Adventure', 'Indie', 'RPG', 'Turn Based ...",4.2,32K,728,5.7K,2.1K,3.9K,2.5K,7,4


In [90]:
# Now that the list was done, I need to convert back to a string so I could use str.replace (as was originally planned but would change) to get rid of the extra characters.
# I copied the code used for turning the developers column into a string and then altered it for Platforms and Genres.
game_df['Platforms'] = game_df['Platforms'].astype(str)
game_df['Genres'] = game_df['Genres'].astype(str)
game_df.head()

Unnamed: 0.1,Unnamed: 0,Title,Release_Date,Developers,Summary,Platforms,Genres,Rating,Plays,Playing,Backlogs,Wishlist,Lists,Reviews,Platform_Count,Genre_Count
0,0,Elden Ring,"Feb 25, 2022","['FromSoftware', 'Bandai Namco Entertainment']","Elden Ring is a fantasy, action and open world...","[""['Windows PC'"", "" 'PlayStation 4'"", "" 'Xbox ...","[""['Adventure'"", "" 'RPG']""]",4.5,21K,4.1K,5.6K,5.5K,4.6K,3K,5,2
1,1,The Legend of Zelda: Breath of the Wild,"Mar 03, 2017","['Nintendo', 'Nintendo EPD Production Group No...",The Legend of Zelda: Breath of the Wild is the...,"[""['Wii U'"", "" 'Nintendo Switch']""]","[""['Adventure'"", "" 'Puzzle']""]",4.4,35K,3.1K,5.6K,3K,5.1K,3K,2,2
2,2,Hades,"Dec 07, 2018",['Supergiant Games'],A rogue-lite hack and slash dungeon crawler in...,"[""['Windows PC'"", "" 'Mac'"", "" 'PlayStation 4'""...","[""['Adventure'"", "" 'Brawler'"", "" 'Indie'"", "" '...",4.3,25K,3.5K,7.3K,4K,3.2K,2.1K,7,4
3,3,Hollow Knight,"Feb 24, 2017",['Team Cherry'],A 2D metroidvania with an emphasis on close co...,"[""['Windows PC'"", "" 'Mac'"", "" 'Linux'"", "" 'Nin...","[""['Adventure'"", "" 'Indie'"", "" 'Platform']""]",4.4,25K,2.7K,9.6K,2.6K,3.4K,2.1K,4,3
4,4,Undertale,"Sep 15, 2015","['tobyfox', '8-4']","A small child falls into the Underground, wher...","[""['Windows PC'"", "" 'Mac'"", "" 'Linux'"", "" 'Pla...","[""['Adventure'"", "" 'Indie'"", "" 'RPG'"", "" 'Turn...",4.2,32K,728,5.7K,2.1K,3.9K,2.5K,7,4


In [92]:
# Originally was 8 lines of code to remove the unwanted characters but after playing around a bit, I got it lowered to two.
game_df['Platforms'] = game_df['Platforms'].str.replace("[","").str.replace("]","").str.replace("'","").str.replace('"','')
game_df['Genres'] = game_df['Genres'].str.replace("[","").str.replace("]","").str.replace("'","").str.replace('"','')
game_df.head()

Unnamed: 0.1,Unnamed: 0,Title,Release_Date,Developers,Summary,Platforms,Genres,Rating,Plays,Playing,Backlogs,Wishlist,Lists,Reviews,Platform_Count,Genre_Count
0,0,Elden Ring,"Feb 25, 2022","['FromSoftware', 'Bandai Namco Entertainment']","Elden Ring is a fantasy, action and open world...","Windows PC, PlayStation 4, Xbox One, PlaySt...","Adventure, RPG",4.5,21K,4.1K,5.6K,5.5K,4.6K,3K,5,2
1,1,The Legend of Zelda: Breath of the Wild,"Mar 03, 2017","['Nintendo', 'Nintendo EPD Production Group No...",The Legend of Zelda: Breath of the Wild is the...,"Wii U, Nintendo Switch","Adventure, Puzzle",4.4,35K,3.1K,5.6K,3K,5.1K,3K,2,2
2,2,Hades,"Dec 07, 2018",['Supergiant Games'],A rogue-lite hack and slash dungeon crawler in...,"Windows PC, Mac, PlayStation 4, Xbox One, ...","Adventure, Brawler, Indie, RPG",4.3,25K,3.5K,7.3K,4K,3.2K,2.1K,7,4
3,3,Hollow Knight,"Feb 24, 2017",['Team Cherry'],A 2D metroidvania with an emphasis on close co...,"Windows PC, Mac, Linux, Nintendo Switch","Adventure, Indie, Platform",4.4,25K,2.7K,9.6K,2.6K,3.4K,2.1K,4,3
4,4,Undertale,"Sep 15, 2015","['tobyfox', '8-4']","A small child falls into the Underground, wher...","Windows PC, Mac, Linux, PlayStation 4, Xbo...","Adventure, Indie, RPG, Turn Based Strategy",4.2,32K,728,5.7K,2.1K,3.9K,2.5K,7,4


In [76]:
game_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60000 entries, 0 to 59999
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Unnamed: 0      60000 non-null  int64  
 1   Title           60000 non-null  object 
 2   Release_Date    60000 non-null  object 
 3   Developers      60000 non-null  object 
 4   Summary         55046 non-null  object 
 5   Platforms       60000 non-null  object 
 6   Genres          60000 non-null  object 
 7   Rating          25405 non-null  float64
 8   Plays           60000 non-null  object 
 9   Playing         60000 non-null  object 
 10  Backlogs        60000 non-null  object 
 11  Wishlist        60000 non-null  object 
 12  Lists           60000 non-null  object 
 13  Reviews         60000 non-null  object 
 14  Platform_Count  60000 non-null  int64  
 15  Genre_Count     60000 non-null  int64  
dtypes: float64(1), int64(3), object(12)
memory usage: 7.3+ MB


In [77]:
game_df.Developers.nunique()

18356

In [78]:
game_df.Developers.value_counts()

Developers
[]                                        18091
['Konami']                                  470
['Nintendo']                                325
['Capcom']                                  318
['Sega']                                    185
                                          ...  
['NanoPiko Games', 'Plug In Digital']         1
['DevCubeStudio', 'Fibrum Limited']           1
['TeamFatBears']                              1
['Blueside', 'Microsoft Game Studios']        1
['Travian Games', 'Shadow Masters']           1
Name: count, Length: 18356, dtype: int64

In [79]:
game_df.Title.value_counts()

Title
Date A Live Twin Edition: Rio Reincarnation            24
Tube                                                   21
Yakiniku Bugyou                                        20
The Store is Closed                                    20
Nonogram Survival                                      19
                                                       ..
Spider-Man: Homecoming - Virtual Reality Experience     1
Hammerfight                                             1
Team USA Basketball                                     1
Syvalion                                                1
Orbit - Playing with Gravity                            1
Name: count, Length: 40985, dtype: int64

In [80]:
game_df.Title.nunique()

40985

In [81]:
game_df['Developers']=game_df['Developers'].str.split(',').str[0]
game_df.head()

Unnamed: 0.1,Unnamed: 0,Title,Release_Date,Developers,Summary,Platforms,Genres,Rating,Plays,Playing,Backlogs,Wishlist,Lists,Reviews,Platform_Count,Genre_Count
0,0,Elden Ring,"Feb 25, 2022",['FromSoftware',"Elden Ring is a fantasy, action and open world...","""Windows PC"", "" PlayStation 4"", "" Xbox One"", ""...","""Adventure"", "" RPG""",4.5,21K,4.1K,5.6K,5.5K,4.6K,3K,5,2
1,1,The Legend of Zelda: Breath of the Wild,"Mar 03, 2017",['Nintendo',The Legend of Zelda: Breath of the Wild is the...,"""Wii U"", "" Nintendo Switch""","""Adventure"", "" Puzzle""",4.4,35K,3.1K,5.6K,3K,5.1K,3K,2,2
2,2,Hades,"Dec 07, 2018",['Supergiant Games'],A rogue-lite hack and slash dungeon crawler in...,"""Windows PC"", "" Mac"", "" PlayStation 4"", "" Xbox...","""Adventure"", "" Brawler"", "" Indie"", "" RPG""",4.3,25K,3.5K,7.3K,4K,3.2K,2.1K,7,4
3,3,Hollow Knight,"Feb 24, 2017",['Team Cherry'],A 2D metroidvania with an emphasis on close co...,"""Windows PC"", "" Mac"", "" Linux"", "" Nintendo Swi...","""Adventure"", "" Indie"", "" Platform""",4.4,25K,2.7K,9.6K,2.6K,3.4K,2.1K,4,3
4,4,Undertale,"Sep 15, 2015",['tobyfox',"A small child falls into the Underground, wher...","""Windows PC"", "" Mac"", "" Linux"", "" PlayStation ...","""Adventure"", "" Indie"", "" RPG"", "" Turn Based St...",4.2,32K,728,5.7K,2.1K,3.9K,2.5K,7,4


In [82]:
game_df["Developers"] = game_df["Developers"].astype(str)
game_df.head()

Unnamed: 0.1,Unnamed: 0,Title,Release_Date,Developers,Summary,Platforms,Genres,Rating,Plays,Playing,Backlogs,Wishlist,Lists,Reviews,Platform_Count,Genre_Count
0,0,Elden Ring,"Feb 25, 2022",['FromSoftware',"Elden Ring is a fantasy, action and open world...","""Windows PC"", "" PlayStation 4"", "" Xbox One"", ""...","""Adventure"", "" RPG""",4.5,21K,4.1K,5.6K,5.5K,4.6K,3K,5,2
1,1,The Legend of Zelda: Breath of the Wild,"Mar 03, 2017",['Nintendo',The Legend of Zelda: Breath of the Wild is the...,"""Wii U"", "" Nintendo Switch""","""Adventure"", "" Puzzle""",4.4,35K,3.1K,5.6K,3K,5.1K,3K,2,2
2,2,Hades,"Dec 07, 2018",['Supergiant Games'],A rogue-lite hack and slash dungeon crawler in...,"""Windows PC"", "" Mac"", "" PlayStation 4"", "" Xbox...","""Adventure"", "" Brawler"", "" Indie"", "" RPG""",4.3,25K,3.5K,7.3K,4K,3.2K,2.1K,7,4
3,3,Hollow Knight,"Feb 24, 2017",['Team Cherry'],A 2D metroidvania with an emphasis on close co...,"""Windows PC"", "" Mac"", "" Linux"", "" Nintendo Swi...","""Adventure"", "" Indie"", "" Platform""",4.4,25K,2.7K,9.6K,2.6K,3.4K,2.1K,4,3
4,4,Undertale,"Sep 15, 2015",['tobyfox',"A small child falls into the Underground, wher...","""Windows PC"", "" Mac"", "" Linux"", "" PlayStation ...","""Adventure"", "" Indie"", "" RPG"", "" Turn Based St...",4.2,32K,728,5.7K,2.1K,3.9K,2.5K,7,4


In [83]:
game_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60000 entries, 0 to 59999
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Unnamed: 0      60000 non-null  int64  
 1   Title           60000 non-null  object 
 2   Release_Date    60000 non-null  object 
 3   Developers      60000 non-null  object 
 4   Summary         55046 non-null  object 
 5   Platforms       60000 non-null  object 
 6   Genres          60000 non-null  object 
 7   Rating          25405 non-null  float64
 8   Plays           60000 non-null  object 
 9   Playing         60000 non-null  object 
 10  Backlogs        60000 non-null  object 
 11  Wishlist        60000 non-null  object 
 12  Lists           60000 non-null  object 
 13  Reviews         60000 non-null  object 
 14  Platform_Count  60000 non-null  int64  
 15  Genre_Count     60000 non-null  int64  
dtypes: float64(1), int64(3), object(12)
memory usage: 7.3+ MB


In [84]:
game_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60000 entries, 0 to 59999
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Unnamed: 0      60000 non-null  int64  
 1   Title           60000 non-null  object 
 2   Release_Date    60000 non-null  object 
 3   Developers      60000 non-null  object 
 4   Summary         55046 non-null  object 
 5   Platforms       60000 non-null  object 
 6   Genres          60000 non-null  object 
 7   Rating          25405 non-null  float64
 8   Plays           60000 non-null  object 
 9   Playing         60000 non-null  object 
 10  Backlogs        60000 non-null  object 
 11  Wishlist        60000 non-null  object 
 12  Lists           60000 non-null  object 
 13  Reviews         60000 non-null  object 
 14  Platform_Count  60000 non-null  int64  
 15  Genre_Count     60000 non-null  int64  
dtypes: float64(1), int64(3), object(12)
memory usage: 7.3+ MB


In [85]:
game_df["Developers"] = game_df["Developers"].apply(lambda x: str(x).lstrip("['"))
game_df["Developers"] = game_df["Developers"].apply(lambda x: str(x).rstrip("]'"))

game_df.head()

Unnamed: 0.1,Unnamed: 0,Title,Release_Date,Developers,Summary,Platforms,Genres,Rating,Plays,Playing,Backlogs,Wishlist,Lists,Reviews,Platform_Count,Genre_Count
0,0,Elden Ring,"Feb 25, 2022",FromSoftware,"Elden Ring is a fantasy, action and open world...","""Windows PC"", "" PlayStation 4"", "" Xbox One"", ""...","""Adventure"", "" RPG""",4.5,21K,4.1K,5.6K,5.5K,4.6K,3K,5,2
1,1,The Legend of Zelda: Breath of the Wild,"Mar 03, 2017",Nintendo,The Legend of Zelda: Breath of the Wild is the...,"""Wii U"", "" Nintendo Switch""","""Adventure"", "" Puzzle""",4.4,35K,3.1K,5.6K,3K,5.1K,3K,2,2
2,2,Hades,"Dec 07, 2018",Supergiant Games,A rogue-lite hack and slash dungeon crawler in...,"""Windows PC"", "" Mac"", "" PlayStation 4"", "" Xbox...","""Adventure"", "" Brawler"", "" Indie"", "" RPG""",4.3,25K,3.5K,7.3K,4K,3.2K,2.1K,7,4
3,3,Hollow Knight,"Feb 24, 2017",Team Cherry,A 2D metroidvania with an emphasis on close co...,"""Windows PC"", "" Mac"", "" Linux"", "" Nintendo Swi...","""Adventure"", "" Indie"", "" Platform""",4.4,25K,2.7K,9.6K,2.6K,3.4K,2.1K,4,3
4,4,Undertale,"Sep 15, 2015",tobyfox,"A small child falls into the Underground, wher...","""Windows PC"", "" Mac"", "" Linux"", "" PlayStation ...","""Adventure"", "" Indie"", "" RPG"", "" Turn Based St...",4.2,32K,728,5.7K,2.1K,3.9K,2.5K,7,4


In [86]:
game_df.Developers.value_counts()

Developers
                     18091
Nintendo               831
Konami                 668
Sega                   510
Capcom                 462
                     ...  
PIxelssoftworks          1
carrotcake.studio        1
Slak Games               1
Smashball Labs           1
Michael Bonardi          1
Name: count, Length: 11329, dtype: int64