In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import regex as re

In [2]:
data = pd.read_csv('games-features.csv')
data.head()

Unnamed: 0,QueryID,ResponseID,QueryName,ResponseName,ReleaseDate,RequiredAge,DemoCount,DeveloperCount,DLCCount,Metacritic,...,LegalNotice,Reviews,SupportedLanguages,Website,PCMinReqsText,PCRecReqsText,LinuxMinReqsText,LinuxRecReqsText,MacMinReqsText,MacRecReqsText
0,10,10,Counter-Strike,Counter-Strike,Nov 1 2000,0,0,1,0,88,...,,,English French German Italian Spanish Simplifi...,,Minimum: 500 mhz processor 96mb ram 16mb video...,,Minimum: Linux Ubuntu 12.04 Dual-core from Int...,,Minimum: OS X Snow Leopard 10.6.3 1GB RAM 4GB...,
1,20,20,Team Fortress Classic,Team Fortress Classic,Apr 1 1999,0,0,1,0,0,...,,,English French German Italian Spanish,,Minimum: 500 mhz processor 96mb ram 16mb video...,,Minimum: Linux Ubuntu 12.04 Dual-core from Int...,,Minimum: OS X Snow Leopard 10.6.3 1GB RAM 4GB...,
2,30,30,Day of Defeat,Day of Defeat,May 1 2003,0,0,1,0,79,...,,,English French German Italian Spanish,http://www.dayofdefeat.com/,Minimum: 500 mhz processor 96mb ram 16mb video...,,Minimum: Linux Ubuntu 12.04 Dual-core from Int...,,Minimum: OS X Snow Leopard 10.6.3 1GB RAM 4GB...,
3,40,40,Deathmatch Classic,Deathmatch Classic,Jun 1 2001,0,0,1,0,0,...,,,English French German Italian Spanish,,Minimum: 500 mhz processor 96mb ram 16mb video...,,Minimum: Linux Ubuntu 12.04 Dual-core from Int...,,Minimum: OS X Snow Leopard 10.6.3 1GB RAM 4GB...,
4,50,50,Half-Life: Opposing Force,Half-Life: Opposing Force,Nov 1 1999,0,0,1,0,0,...,,,English French German Korean,,Minimum: 500 mhz processor 96mb ram 16mb video...,,Minimum: Linux Ubuntu 12.04 Dual-core from Int...,,Minimum: OS X Snow Leopard 10.6.3 1GB RAM 4GB...,


In [3]:
# For now Dropping Unnecessary Columns
# More columns can be dropped depending on the what should be worked on
drop_cols = ['ResponseID','ResponseName','DRMNotice', 'ExtUserAcctNotice', 'DemoCount',
             'LinuxRecReqsText','MacRecReqsText','MacMinReqsText','MovieCount',
             'PackageCount','SteamSpyOwnersVariance','SteamSpyPlayersVariance',
             'SupportEmail','SupportURL','Website','AboutText','Background','ShortDescrip',
             'DetailedDescrip','HeaderImage','LegalNotice','Reviews','PCMinReqsText',
             'PCRecReqsText','LinuxMinReqsText','PCReqsHaveMin','PCReqsHaveRec', 
             'LinuxReqsHaveMin', 'LinuxReqsHaveRec','MacReqsHaveMin', 'MacReqsHaveRec']

data.drop(drop_cols, axis=1, inplace=True)

In [4]:
platforms = []
for index, row in data.iterrows():
    platform = []
    if row['PlatformWindows']:
        platform.append('Windows')
    if row['PlatformLinux']:
        platform.append('Linux')
    if row['PlatformMac']:
        platform.append('Mac')
    platforms.append(', '.join(platform))

data['Platforms'] = platforms

In [5]:
data['Platforms']

0        Windows, Linux, Mac
1        Windows, Linux, Mac
2        Windows, Linux, Mac
3        Windows, Linux, Mac
4        Windows, Linux, Mac
                ...         
13352                Windows
13353                Windows
13354                Windows
13355                Windows
13356           Windows, Mac
Name: Platforms, Length: 13357, dtype: object

In [6]:
categories = []
for index, row in data.iterrows():
    category = []
    if row['CategorySinglePlayer']:
        category.append('SinglePlayer')
    if row['CategoryMultiplayer']:
        category.append('Multiplayer')
    if row['CategoryCoop']:
        category.append('Coop')
    if row['CategoryMMO']:
        category.append('MMO')
    if row['CategoryInAppPurchase']:
        category.append('InAppPurchase')
    if row['CategoryIncludeSrcSDK']:
        category.append('IncludeSrcSDK')
    if row['CategoryIncludeLevelEditor']:
        category.append('IncludeLevelEditor')
    if row['CategoryVRSupport']:
        category.append('VRSupport')
    categories.append(', '.join(category))

data['Categories'] = categories

In [7]:
data['Categories']

0                            Multiplayer
1                            Multiplayer
2                            Multiplayer
3                            Multiplayer
4              SinglePlayer, Multiplayer
                      ...               
13352                       SinglePlayer
13353                       SinglePlayer
13354                       SinglePlayer
13355    SinglePlayer, Multiplayer, Coop
13356                       SinglePlayer
Name: Categories, Length: 13357, dtype: object

In [8]:
genres = []
for index, row in data.iterrows():
    genre = []
    if row['GenreIsNonGame']:
        genre.append('NonGame')
    if row['GenreIsIndie']:
        genre.append('Indie')
    if row['GenreIsAction']:
        genre.append('Action')
    if row['GenreIsAdventure']:
        genre.append('Adventure')
    if row['GenreIsCasual']:
        genre.append('Casual')
    if row['GenreIsStrategy']:
        genre.append('Strategy')
    if row['GenreIsRPG']:
        genre.append('RPG')
    if row['GenreIsSimulation']:
        genre.append('Simulation')
    if row['GenreIsEarlyAccess']:
        genre.append('EarlyAccess')
    if row['GenreIsFreeToPlay']:
        genre.append('FreeToPlay')
    if row['GenreIsSports']:
        genre.append('Sports')
    if row['GenreIsRacing']:
        genre.append('Racing')
    if row['GenreIsMassivelyMultiplayer']:
        genre.append('MassivelyMultiplayer')
    genres.append(', '.join(genre))

data['Genres'] = genres

In [9]:
data['Genres']

0                                    Action
1                                    Action
2                                    Action
3                                    Action
4                                    Action
                        ...                
13352                 Indie, Casual, Sports
13353                         Indie, Casual
13354      Indie, Action, Adventure, Casual
13355         Indie, Action, Casual, Sports
13356    Indie, Adventure, Casual, Strategy
Name: Genres, Length: 13357, dtype: object

In [10]:
drop_columns = ['QueryID','PlatformWindows', 'PlatformLinux', 'PlatformMac','CategorySinglePlayer',
                'CategoryMultiplayer', 'CategoryCoop',
                'CategoryMMO', 'CategoryInAppPurchase', 'CategoryIncludeSrcSDK',
                'CategoryIncludeLevelEditor', 'CategoryVRSupport', 'GenreIsNonGame',
                'GenreIsIndie', 'GenreIsAction', 'GenreIsAdventure', 'GenreIsCasual',
                'GenreIsStrategy', 'GenreIsRPG', 'GenreIsSimulation',
                'GenreIsEarlyAccess', 'GenreIsFreeToPlay', 'GenreIsSports',
                'GenreIsRacing', 'GenreIsMassivelyMultiplayer']

data.drop(drop_columns, axis=1, inplace=True)

In [11]:
data.columns

Index(['QueryName', 'ReleaseDate', 'RequiredAge', 'DeveloperCount', 'DLCCount',
       'Metacritic', 'RecommendationCount', 'PublisherCount',
       'ScreenshotCount', 'SteamSpyOwners', 'SteamSpyPlayersEstimate',
       'AchievementCount', 'AchievementHighlightedCount', 'ControllerSupport',
       'IsFree', 'FreeVerAvail', 'PurchaseAvail', 'SubscriptionAvail',
       'PriceCurrency', 'PriceInitial', 'PriceFinal', 'SupportedLanguages',
       'Platforms', 'Categories', 'Genres'],
      dtype='object')

In [12]:
apriori_one = data['Genres']
apriori_one

0                                    Action
1                                    Action
2                                    Action
3                                    Action
4                                    Action
                        ...                
13352                 Indie, Casual, Sports
13353                         Indie, Casual
13354      Indie, Action, Adventure, Casual
13355         Indie, Action, Casual, Sports
13356    Indie, Adventure, Casual, Strategy
Name: Genres, Length: 13357, dtype: object

In [13]:
apriori_df = pd.DataFrame({'Genres': apriori_one})

In [14]:
genres_split = apriori_df['Genres'].str.split(', ', expand=True)

In [15]:
genres_split

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,Action,,,,,,,,,
1,Action,,,,,,,,,
2,Action,,,,,,,,,
3,Action,,,,,,,,,
4,Action,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
13352,Indie,Casual,Sports,,,,,,,
13353,Indie,Casual,,,,,,,,
13354,Indie,Action,Adventure,Casual,,,,,,
13355,Indie,Action,Casual,Sports,,,,,,


In [16]:
apriori_two = data['Categories']
apriori_two

0                            Multiplayer
1                            Multiplayer
2                            Multiplayer
3                            Multiplayer
4              SinglePlayer, Multiplayer
                      ...               
13352                       SinglePlayer
13353                       SinglePlayer
13354                       SinglePlayer
13355    SinglePlayer, Multiplayer, Coop
13356                       SinglePlayer
Name: Categories, Length: 13357, dtype: object

In [17]:
apriori_df1 = pd.DataFrame({'Categories': apriori_two})

In [18]:
cat_split = apriori_df1['Categories'].str.split(', ', expand=True)

In [19]:
cat_split

Unnamed: 0,0,1,2,3,4,5
0,Multiplayer,,,,,
1,Multiplayer,,,,,
2,Multiplayer,,,,,
3,Multiplayer,,,,,
4,SinglePlayer,Multiplayer,,,,
...,...,...,...,...,...,...
13352,SinglePlayer,,,,,
13353,SinglePlayer,,,,,
13354,SinglePlayer,,,,,
13355,SinglePlayer,Multiplayer,Coop,,,


In [20]:
final_df = pd.concat([genres_split, cat_split], axis=1)

In [21]:
final_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,0.1,1.1,2.1,3.1,4.1,5.1
0,Action,,,,,,,,,,Multiplayer,,,,,
1,Action,,,,,,,,,,Multiplayer,,,,,
2,Action,,,,,,,,,,Multiplayer,,,,,
3,Action,,,,,,,,,,Multiplayer,,,,,
4,Action,,,,,,,,,,SinglePlayer,Multiplayer,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13352,Indie,Casual,Sports,,,,,,,,SinglePlayer,,,,,
13353,Indie,Casual,,,,,,,,,SinglePlayer,,,,,
13354,Indie,Action,Adventure,Casual,,,,,,,SinglePlayer,,,,,
13355,Indie,Action,Casual,Sports,,,,,,,SinglePlayer,Multiplayer,Coop,,,


In [22]:
final_df.replace('None', '', inplace=True)

In [23]:
final_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,0.1,1.1,2.1,3.1,4.1,5.1
0,Action,,,,,,,,,,Multiplayer,,,,,
1,Action,,,,,,,,,,Multiplayer,,,,,
2,Action,,,,,,,,,,Multiplayer,,,,,
3,Action,,,,,,,,,,Multiplayer,,,,,
4,Action,,,,,,,,,,SinglePlayer,Multiplayer,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13352,Indie,Casual,Sports,,,,,,,,SinglePlayer,,,,,
13353,Indie,Casual,,,,,,,,,SinglePlayer,,,,,
13354,Indie,Action,Adventure,Casual,,,,,,,SinglePlayer,,,,,
13355,Indie,Action,Casual,Sports,,,,,,,SinglePlayer,Multiplayer,Coop,,,


In [24]:
final_df.to_csv('final.csv', index=False)

In [25]:
# Open the input file for reading
with open('final.csv', 'r') as input_file:
    # Open a new file for writing
    with open('output_file.csv', 'w') as output_file:
        # Iterate through each line in the input file
        for line in input_file:
            # Remove extra commas from the line
            line = ','.join(filter(None, line.strip().split(',')))
            # Write the modified line to the output file
            output_file.write(line + '\n')

In [27]:
df = pd.read_csv('output_file.csv')
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,0.1,1.1,2.1,3.1,4.1,5.1
0,Action,Multiplayer,,,,,,,,,,,,,,
1,Action,Multiplayer,,,,,,,,,,,,,,
2,Action,Multiplayer,,,,,,,,,,,,,,
3,Action,Multiplayer,,,,,,,,,,,,,,
4,Action,SinglePlayer,Multiplayer,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12658,Indie,Casual,Sports,SinglePlayer,,,,,,,,,,,,
12659,Indie,Casual,SinglePlayer,,,,,,,,,,,,,
12660,Indie,Action,Adventure,Casual,SinglePlayer,,,,,,,,,,,
12661,Indie,Action,Casual,Sports,SinglePlayer,Multiplayer,Coop,,,,,,,,,
