## Load dataset

In [None]:
import pandas as pd 
import numpy as np


df = pd.read_csv('raw_videogame_sales.csv')

## Initial Data Inspection


In [None]:
df.shape
df.info()
df.isnull().sum()


## Standardize column names and drop unnecessary columns

In [None]:
df.columns = df.columns.str.lower().str.strip()
df = df.drop(columns=['critic_score','critic_count','user_score','user_count','rating'])


## String cleaning, data type conversions and handling missing values

In [None]:
df['name'] = df['name'].str.strip().str.replace(r'/',' & ',regex=True).str.title()
df.dropna(subset=['name'],inplace=True)

df['year_of_release'] = pd.to_numeric(df['year_of_release'],errors='coerce').astype('Int64')
df = df.dropna(subset=['year_of_release'])

df['genre'] = df['genre'].str.replace('-',' ').str.strip().str.title()

df['publisher'] = df['publisher'].str.replace('-',' ').str.strip().str.title()
df = df.dropna(subset=['publisher'])

df['developer'].fillna('Unknown',inplace=True)

## Catogorical mapping for platform names

In [None]:
platform_map = {
    'Wii': 'Nintendo Wii',
    'NES': 'Nintendo Entertainment System',
    'GB': 'Game Boy',
    'DS': 'Nintendo DS',
    'SNES': 'Super Nintendo Entertainment System',
    'GBA': 'Game Boy Advance',
    '3DS': 'Nintendo 3DS',
    'N64': 'Nintendo 64',
    'WiiU': 'Nintendo Wii U',
    'GC': 'GameCube',
    'PS': 'PlayStation',
    'PS2': 'PlayStation 2',
    'PS3': 'PlayStation 3',
    'PS4': 'PlayStation 4',
    'PSP': 'PlayStation Portable',
    'PSV': 'PlayStation Vita',
    'X360': 'Xbox 360',
    'XB': 'Xbox',
    'XOne': 'Xbox One',
    'GEN': 'Sega Genesis',
    'DC': 'Sega Dreamcast',
    'SAT': 'Sega Saturn',
    'SCD': 'Sega CD',
    'GG': 'Sega Game Gear',
    'PC': 'Personal Computer',
    '2600': 'Atari 2600',
    'NG': 'Neo Geo',
    'TG16': 'TurboGrafx-16',
    'WS': 'WonderSwan',
    '3DO': '3DO Interactive Multiplayer',
    'PCFX': 'PC-FX'}
df['platform'] = df['platform'].map(platform_map).fillna(df['platform'])


## Final cleaned dataset export


In [None]:

df.to_csv('cleaned_videogame_sales.csv',index=False)