# Import the libraries that are necesaries

In [1]:
import pandas as pd
from sklearn.impute import SimpleImputer

### Read the CSVs

In [2]:
game_data = pd.read_csv('../data/raw/games.csv')

In [3]:
game_data

Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,DLC count,About the game,Supported languages,...,Average playtime two weeks,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,Screenshots,Movies
0,20200,Galactic Bowling,"Oct 21, 2008",0 - 20000,0,0,19.99,0,Galactic Bowling is an exaggerated and stylize...,['English'],...,0,0,0,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports","Indie,Casual,Sports,Bowling",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
1,655370,Train Bandit,"Oct 12, 2017",0 - 20000,0,0,0.99,0,THE LAW!! Looks to be a showdown atop a train....,"['English', 'French', 'Italian', 'German', 'Sp...",...,0,0,0,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie","Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
2,1732930,Jolt Project,"Nov 17, 2021",0 - 20000,0,0,4.99,0,Jolt Project: The army now has a new robotics ...,"['English', 'Portuguese - Brazil']",...,0,0,0,Campião Games,Campião Games,Single-player,"Action,Adventure,Indie,Strategy",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
3,1355720,Henosis™,"Jul 23, 2020",0 - 20000,0,0,5.99,0,HENOSIS™ is a mysterious 2D Platform Puzzler w...,"['English', 'French', 'Italian', 'German', 'Sp...",...,0,0,0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie","2D Platformer,Atmospheric,Surreal,Mystery,Puzz...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
4,1139950,Two Weeks in Painland,"Feb 3, 2020",0 - 20000,0,0,0.00,0,ABOUT THE GAME Play as a hacker who has arrang...,"['English', 'Spanish - Spain']",...,0,0,0,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie","Indie,Adventure,Nudity,Violent,Sexual Content,...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85098,2669080,Mannerheim's Saloon Car,"Jan 2, 2024",0 - 0,0,0,0.00,0,Marshal Mannerheim’s Saloon Car is the train c...,"['English', 'Finnish']",...,0,0,0,Xamk Game Studios,"Sodan ja rauhan keskus Muisti, Päämajamuseo","Single-player,Tracked Controller Support,VR Only","Adventure,Simulation",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
85099,2736910,Beer Run,"Jan 3, 2024",0 - 0,0,0,0.00,0,Beer Run is an Indie game created to steal bee...,['English'],...,0,0,0,955 Games,955 Games,Single-player,"Casual,Indie",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
85100,2743220,My Friend The Spider,"Jan 4, 2024",0 - 0,0,0,0.00,0,A small 'horror' narrative game about isolatio...,['English'],...,0,0,0,MCA,MCA,Single-player,"Adventure,Simulation",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
85101,2293130,Path of Survivors,"Jan 8, 2024",0 - 0,0,0,3.99,0,Path of Survivors is a multi-class auto-battle...,['English'],...,0,0,0,Limited Input,Limited Input,"Single-player,Steam Achievements,Partial Contr...","Action,Casual,Indie,RPG,Simulation",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...


Changes the nulls by the mean or Unknown for the categorical and numeric columns

In [4]:
# Convert to lowercase and replace spaces with underscores
game_data.columns = game_data.columns.str.lower().str.replace(' ', '_')

# Identify columns with missing values
columns_with_missing_values = game_data.columns[game_data.isnull().any()].tolist()

# Separate columns by data type
numerical_columns = game_data.select_dtypes(include=['number']).columns.tolist()
text_columns = game_data.select_dtypes(exclude=['number']).columns.tolist()

# Exclude 'appid' from imputation
numerical_columns.remove('appid')

# Impute numerical columns using SimpleImputer with mean strategy
numerical_imputer = SimpleImputer(strategy='mean')
game_data[numerical_columns] = numerical_imputer.fit_transform(game_data[numerical_columns])

# Impute text/categorical columns using a constant value
text_imputer = SimpleImputer(strategy='constant', fill_value='unknown')
game_data[text_columns] = text_imputer.fit_transform(game_data[text_columns])

In [5]:
game_data

Unnamed: 0,appid,name,release_date,estimated_owners,peak_ccu,required_age,price,dlc_count,about_the_game,supported_languages,...,average_playtime_two_weeks,median_playtime_forever,median_playtime_two_weeks,developers,publishers,categories,genres,tags,screenshots,movies
0,20200,Galactic Bowling,"Oct 21, 2008",0 - 20000,0.0,0.0,19.99,0.0,Galactic Bowling is an exaggerated and stylize...,['English'],...,0.0,0.0,0.0,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports","Indie,Casual,Sports,Bowling",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
1,655370,Train Bandit,"Oct 12, 2017",0 - 20000,0.0,0.0,0.99,0.0,THE LAW!! Looks to be a showdown atop a train....,"['English', 'French', 'Italian', 'German', 'Sp...",...,0.0,0.0,0.0,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie","Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
2,1732930,Jolt Project,"Nov 17, 2021",0 - 20000,0.0,0.0,4.99,0.0,Jolt Project: The army now has a new robotics ...,"['English', 'Portuguese - Brazil']",...,0.0,0.0,0.0,Campião Games,Campião Games,Single-player,"Action,Adventure,Indie,Strategy",unknown,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
3,1355720,Henosis™,"Jul 23, 2020",0 - 20000,0.0,0.0,5.99,0.0,HENOSIS™ is a mysterious 2D Platform Puzzler w...,"['English', 'French', 'Italian', 'German', 'Sp...",...,0.0,0.0,0.0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie","2D Platformer,Atmospheric,Surreal,Mystery,Puzz...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
4,1139950,Two Weeks in Painland,"Feb 3, 2020",0 - 20000,0.0,0.0,0.00,0.0,ABOUT THE GAME Play as a hacker who has arrang...,"['English', 'Spanish - Spain']",...,0.0,0.0,0.0,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie","Indie,Adventure,Nudity,Violent,Sexual Content,...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85098,2669080,Mannerheim's Saloon Car,"Jan 2, 2024",0 - 0,0.0,0.0,0.00,0.0,Marshal Mannerheim’s Saloon Car is the train c...,"['English', 'Finnish']",...,0.0,0.0,0.0,Xamk Game Studios,"Sodan ja rauhan keskus Muisti, Päämajamuseo","Single-player,Tracked Controller Support,VR Only","Adventure,Simulation",unknown,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
85099,2736910,Beer Run,"Jan 3, 2024",0 - 0,0.0,0.0,0.00,0.0,Beer Run is an Indie game created to steal bee...,['English'],...,0.0,0.0,0.0,955 Games,955 Games,Single-player,"Casual,Indie",unknown,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
85100,2743220,My Friend The Spider,"Jan 4, 2024",0 - 0,0.0,0.0,0.00,0.0,A small 'horror' narrative game about isolatio...,['English'],...,0.0,0.0,0.0,MCA,MCA,Single-player,"Adventure,Simulation",unknown,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
85101,2293130,Path of Survivors,"Jan 8, 2024",0 - 0,0.0,0.0,3.99,0.0,Path of Survivors is a multi-class auto-battle...,['English'],...,0.0,0.0,0.0,Limited Input,Limited Input,"Single-player,Steam Achievements,Partial Contr...","Action,Casual,Indie,RPG,Simulation",unknown,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...


Remove the brackets

In [6]:
# Columns to process
columns_to_process = ['supported_languages', 'full_audio_languages']

# Function to remove brackets from strings
def remove_brackets(s):
    if isinstance(s, str):
        return s.replace('[', '').replace(']', '').replace("'", '')
    else:
        return s

# Apply the function to each column
for col in columns_to_process:
    game_data[col] = game_data[col].apply(remove_brackets)
  
# Convert 'release_date' column to datetime format
game_data['release_date'] = pd.to_datetime(game_data['release_date'], format='%b %d, %Y', errors='coerce')

# Assuming df is your DataFrame and 'estimated_owners' is the column
game_data['estimated_owners'] = game_data['estimated_owners'].apply(lambda x: sum(map(int, x.split('-'))) / 2 if isinstance(x, str) else x)


In [7]:
game_data

Unnamed: 0,appid,name,release_date,estimated_owners,peak_ccu,required_age,price,dlc_count,about_the_game,supported_languages,...,average_playtime_two_weeks,median_playtime_forever,median_playtime_two_weeks,developers,publishers,categories,genres,tags,screenshots,movies
0,20200,Galactic Bowling,2008-10-21,10000.0,0.0,0.0,19.99,0.0,Galactic Bowling is an exaggerated and stylize...,English,...,0.0,0.0,0.0,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports","Indie,Casual,Sports,Bowling",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
1,655370,Train Bandit,2017-10-12,10000.0,0.0,0.0,0.99,0.0,THE LAW!! Looks to be a showdown atop a train....,"English, French, Italian, German, Spanish - Sp...",...,0.0,0.0,0.0,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie","Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
2,1732930,Jolt Project,2021-11-17,10000.0,0.0,0.0,4.99,0.0,Jolt Project: The army now has a new robotics ...,"English, Portuguese - Brazil",...,0.0,0.0,0.0,Campião Games,Campião Games,Single-player,"Action,Adventure,Indie,Strategy",unknown,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
3,1355720,Henosis™,2020-07-23,10000.0,0.0,0.0,5.99,0.0,HENOSIS™ is a mysterious 2D Platform Puzzler w...,"English, French, Italian, German, Spanish - Sp...",...,0.0,0.0,0.0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie","2D Platformer,Atmospheric,Surreal,Mystery,Puzz...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
4,1139950,Two Weeks in Painland,2020-02-03,10000.0,0.0,0.0,0.00,0.0,ABOUT THE GAME Play as a hacker who has arrang...,"English, Spanish - Spain",...,0.0,0.0,0.0,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie","Indie,Adventure,Nudity,Violent,Sexual Content,...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85098,2669080,Mannerheim's Saloon Car,2024-01-02,0.0,0.0,0.0,0.00,0.0,Marshal Mannerheim’s Saloon Car is the train c...,"English, Finnish",...,0.0,0.0,0.0,Xamk Game Studios,"Sodan ja rauhan keskus Muisti, Päämajamuseo","Single-player,Tracked Controller Support,VR Only","Adventure,Simulation",unknown,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
85099,2736910,Beer Run,2024-01-03,0.0,0.0,0.0,0.00,0.0,Beer Run is an Indie game created to steal bee...,English,...,0.0,0.0,0.0,955 Games,955 Games,Single-player,"Casual,Indie",unknown,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
85100,2743220,My Friend The Spider,2024-01-04,0.0,0.0,0.0,0.00,0.0,A small 'horror' narrative game about isolatio...,English,...,0.0,0.0,0.0,MCA,MCA,Single-player,"Adventure,Simulation",unknown,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
85101,2293130,Path of Survivors,2024-01-08,0.0,0.0,0.0,3.99,0.0,Path of Survivors is a multi-class auto-battle...,English,...,0.0,0.0,0.0,Limited Input,Limited Input,"Single-player,Steam Achievements,Partial Contr...","Action,Casual,Indie,RPG,Simulation",unknown,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...


In [8]:
game_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85103 entries, 0 to 85102
Data columns (total 39 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   appid                       85103 non-null  int64         
 1   name                        85103 non-null  object        
 2   release_date                84973 non-null  datetime64[ns]
 3   estimated_owners            85103 non-null  float64       
 4   peak_ccu                    85103 non-null  float64       
 5   required_age                85103 non-null  float64       
 6   price                       85103 non-null  float64       
 7   dlc_count                   85103 non-null  float64       
 8   about_the_game              85103 non-null  object        
 9   supported_languages         85103 non-null  object        
 10  full_audio_languages        85103 non-null  object        
 11  reviews                     85103 non-null  object    

### Part dedicated to separate the data for MySQL

Specify the file paths for each CSV file & Create separate DataFrames for each table

In [9]:
# Game table
df_game = game_data[['appid', 'name', 'release_date', 'estimated_owners', 'peak_ccu',
              'required_age', 'price', 'dlc_count', 'about_the_game',
              'reviews', 'header_image', 'website', 'support_url', 'support_email']]    
df_game.to_csv('../data/clean/game.csv', index=False)

In [10]:
# Platform Table
df_languages = game_data[['appid', 'supported_languages', 'full_audio_languages']]
df_languages.to_csv('../data/clean/languages.csv', index=False)

In [11]:
# Platform Table
df_platform = game_data[['appid', 'windows', 'mac', 'linux']]
df_platform.to_csv('../data/clean/platform.csv', index=False)

In [12]:
# Metacritic Table
df_metacritic = game_data[['appid', 'metacritic_score', 'metacritic_url']]
df_metacritic.to_csv('../data/clean/metacritic.csv', index=False)

In [13]:
# Feedback Table
df_user_feedback = game_data[['appid', 'user_score', 'positive', 'negative', 'score_rank',
                       'achievements', 'recommendations']]
df_user_feedback.to_csv('../data/clean/user_feedback.csv', index=False)

In [14]:
# Playtime Table
df_playtime = game_data[['appid', 'average_playtime_forever', 'average_playtime_two_weeks',
                  'median_playtime_forever', 'median_playtime_two_weeks']]
df_playtime.to_csv('../data/clean/playtime.csv', index=False)

In [15]:
# Developer Table
df_development = game_data[['appid', 'developers', 'publishers']]
df_development.to_csv('../data/clean/development.csv', index=False)

In [16]:
# Categories Table
df_categorization = game_data[['appid', 'categories', 'genres', 'tags']]
df_categorization.to_csv('../data/clean/categorization.csv', index=False)

In [17]:
# Media Table
df_media = game_data[['appid', 'screenshots', 'movies']]
df_media.to_csv('../data/clean/media.csv', index=False)