In [1]:
## Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
## Importing the OS and JSON Modules
import os,json

In [2]:
SI = pd.read_csv('superhero_info - superhero_info.csv')
SI.head()

Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}"
2,Abin Sur|DC Comics,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}"
3,Abomination|Marvel Comics,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
4,Absorbing Man|Marvel Comics,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}"


In [3]:
SP = pd.read_csv('superhero_powers - superhero_powers.csv')
SP.head()

Unnamed: 0,hero_names,Powers
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super..."
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du..."
3,Abin Sur,Lantern Power Ring
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt..."


I. Clean the files and combine them into one final DataFrame.

In [4]:
# Exploring existing format for Hero|Publisher with a few examples
SI['Hero|Publisher'].head(2)

0            A-Bomb|Marvel Comics
1    Abe Sapien|Dark Horse Comics
Name: Hero|Publisher, dtype: object

In [5]:
## To split, add expand=True
SI['Hero|Publisher'].str.split('|',expand=True)

Unnamed: 0,0,1
0,A-Bomb,Marvel Comics
1,Abe Sapien,Dark Horse Comics
2,Abin Sur,DC Comics
3,Abomination,Marvel Comics
4,Absorbing Man,Marvel Comics
...,...,...
458,Yellowjacket,Marvel Comics
459,Yellowjacket II,Marvel Comics
460,Yoda,George Lucas
461,Zatanna,DC Comics


In [6]:
## save the 2 new columns into the dataframe
SI[['Hero','Publisher']] = SI['Hero|Publisher'].str.split('|',expand=True)
SI.head(2)

Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics


In [7]:
## drop the original column 
SI = SI.drop(columns=['Hero|Publisher'])
SI.head(2)

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics


In [8]:
## examining a single value from the measurements col
mesmt = SI.loc[0,"Measurements"]
print(type(mesmt))
mesmt

<class 'str'>


"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"

In [9]:
import json
json.loads(mesmt)

JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)

In [16]:
#To fix this, we can replace our single quotes with double quotes before we use json.loads
mesmt = mesmt.replace("'",'"')
mesmt

'{"Height": "203.0 cm", "Weight": "441.0 kg"}'

In [17]:
## now we can use json.loads
fixed_mesmt = json.loads(mesmt)
print(type(fixed_mesmt))
fixed_mesmt

<class 'dict'>


{'Height': '203.0 cm', 'Weight': '441.0 kg'}

In [18]:
fixed_mesmt_2 = fixed_mesmt.replace("'",'')
fixed_mesmt_2

AttributeError: 'dict' object has no attribute 'replace'

In [19]:
#We can apply this transformation to the entire column by combining:
#Using .str.replace to replace single quotes.
#Using .apply with json.loads to convert all rows simultaneously   

## use .str.replace to replace all single quotes
SI['Measurements'] = SI['Measurements'].str.replace("'",'"')
## Apply the json.loads to the full column
SI['Measurements'] = SI['Measurements'].apply(json.loads)
SI['Measurements'].head()

0    {'Height': '203.0 cm', 'Weight': '441.0 kg'}
1     {'Height': '191.0 cm', 'Weight': '65.0 kg'}
2     {'Height': '185.0 cm', 'Weight': '90.0 kg'}
3    {'Height': '203.0 cm', 'Weight': '441.0 kg'}
4    {'Height': '193.0 cm', 'Weight': '122.0 kg'}
Name: Measurements, dtype: object

In [20]:
## check a single value after transformation
test_mesmt = SI.loc[0, 'Measurements']
print(type(test_mesmt))
test_mesmt

<class 'dict'>


{'Height': '203.0 cm', 'Weight': '441.0 kg'}

In [21]:
#Unpack a column of Measurements into separate columns
hei_wei = SI['Measurements'].apply(pd.Series)
hei_wei

Unnamed: 0,Height,Weight
0,203.0 cm,441.0 kg
1,191.0 cm,65.0 kg
2,185.0 cm,90.0 kg
3,203.0 cm,441.0 kg
4,193.0 cm,122.0 kg
...,...,...
458,183.0 cm,83.0 kg
459,165.0 cm,52.0 kg
460,66.0 cm,17.0 kg
461,170.0 cm,57.0 kg


In [22]:
# concat hei_wei with original dataframe
SI = pd.concat((SI, hei_wei), axis = 1)
SI.head(2)

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher,Height,Weight
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics,203.0 cm,441.0 kg
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics,191.0 cm,65.0 kg


In [23]:
#drop the original Measurements column.
SI = SI.drop(columns=['Measurements'])

In [24]:
SI.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height,Weight
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203.0 cm,441.0 kg
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191.0 cm,65.0 kg
2,Male,Ungaran,good,No Hair,blue,red,Abin Sur,DC Comics,185.0 cm,90.0 kg
3,Male,Human / Radiation,bad,No Hair,green,Unknown,Abomination,Marvel Comics,203.0 cm,441.0 kg
4,Male,Human,bad,No Hair,blue,Unknown,Absorbing Man,Marvel Comics,193.0 cm,122.0 kg


In [34]:
#one-hot-encoded columns for every power that appears in the dataset.
SP['Powers'].value_counts()

Intelligence                                                                                                                                                                                                                                                         8
Durability,Super Strength                                                                                                                                                                                                                                            5
Agility,Stealth,Marksmanship,Weapons Master,Stamina                                                                                                                                                                                                                  4
Marksmanship                                                                                                                                                                                                       

In [35]:
## showing the lists are really strings
SP.loc[2,'Powers']

'Agility,Accelerated Healing,Cold Resistance,Durability,Underwater breathing,Marksmanship,Weapons Master,Longevity,Intelligence,Super Strength,Telepathy,Stamina,Immortality,Reflexes,Enhanced Sight,Sub-Mariner'

In [36]:
## To split, add expand=True
SP['PowerSplit'] = SP['Powers'].str.split(',',expand=False)

In [37]:
## exploding the column of lists
exploded = SP.explode('PowerSplit')
exploded

Unnamed: 0,hero_names,Powers,"Agility,Super Strength,Stamina,Super Speed","Accelerated Healing,Durability,Longevity,Super Strength,Stamina,Camouflage,Self-Sustenance","Agility,Accelerated Healing,Cold Resistance,Durability,Underwater breathing,Marksmanship,Weapons Master,Longevity,Intelligence,Super Strength,Telepathy,Stamina,Immortality,Reflexes,Enhanced Sight,Sub-Mariner",Lantern Power Ring,"Accelerated Healing,Intelligence,Super Strength,Stamina,Super Speed,Invulnerability,Animation,Super Breath","Dimensional Awareness,Flight,Intelligence,Super Strength,Size Changing,Super Speed,Teleportation,Magic,Dimensional Travel,Immortality,Invulnerability,Molecular Manipulation,Energy Manipulation,Power Cosmic","Cold Resistance,Durability,Energy Absorption,Super Strength,Invulnerability,Elemental Transmogrification,Fire Resistance,Natural Armor,Molecular Manipulation,Heat Resistance,Matter Absorption","Accelerated Healing,Immortality,Regeneration",...,"Accelerated Healing,Durability,Flight,Marksmanship,Weapons Master,Longevity,Intelligence,Super Strength,Telepathy,Stamina,Super Speed,Animal Oriented Powers,Weapon-based Powers,Enhanced Senses,Dimensional Travel,Enhanced Memory,Reflexes,Force Fields,Fire Resistance,Enhanced Hearing,Hypnokinesis,Enhanced Smell,Vision - Telescopic,Toxin and Disease Resistance,Magic Resistance,Vision - Microscopic,Vision - Night,Vision - Infrared,Vision - X-Ray,Vision - Thermal","Agility,Accelerated Healing,Durability,Stealth,Marksmanship,Longevity,Super Strength,Stamina,Jump,Reflexes,Enhanced Hearing,Enhanced Sight,Natural Weapons,Enhanced Smell,Vision - Telescopic,Toxin and Disease Resistance,Vision - Night","Flight,Telepathy,Astral Travel,Teleportation,Telekinesis,Phasing,Astral Projection,Psionic Powers,Mind Control,Intangibility,Illusions","Size Changing,Animal Oriented Powers","Flight,Energy Blasts,Size Changing","Cold Resistance,Durability,Longevity,Super Strength,Cryokinesis,Immortality","Agility,Stealth,Danger Sense,Marksmanship,Weapons Master,Longevity,Intelligence,Telepathy,Energy Blasts,Stamina,Super Speed,Telekinesis,Jump,Reflexes,Force Fields,Empathy,Precognition,Cloaking,The Force","Cryokinesis,Telepathy,Magic,Fire Control,Probability Manipulation,Water Control,Terrakinesis,Weather Control","Super Speed,Intangibility,Time Travel,Time Manipulation",PowerSplit
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed",True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,Agility
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed",True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,Super Strength
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed",True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,Stamina
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed",True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,Super Speed
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super...",False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,Accelerated Healing
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
665,Zatanna,"Cryokinesis,Telepathy,Magic,Fire Control,Proba...",False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,Weather Control
666,Zoom,"Super Speed,Intangibility,Time Travel,Time Man...",False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,Super Speed
666,Zoom,"Super Speed,Intangibility,Time Travel,Time Man...",False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,Intangibility
666,Zoom,"Super Speed,Intangibility,Time Travel,Time Man...",False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,Time Travel


In [38]:
## saving the unique values from the exploded column
cols_to_make = exploded['PowerSplit'].dropna().unique()
cols_to_make

array(['Agility', 'Super Strength', 'Stamina', 'Super Speed',
       'Accelerated Healing', 'Durability', 'Longevity', 'Camouflage',
       'Self-Sustenance', 'Cold Resistance', 'Underwater breathing',
       'Marksmanship', 'Weapons Master', 'Intelligence', 'Telepathy',
       'Immortality', 'Reflexes', 'Enhanced Sight', 'Sub-Mariner',
       'Lantern Power Ring', 'Invulnerability', 'Animation',
       'Super Breath', 'Dimensional Awareness', 'Flight', 'Size Changing',
       'Teleportation', 'Magic', 'Dimensional Travel',
       'Molecular Manipulation', 'Energy Manipulation', 'Power Cosmic',
       'Energy Absorption', 'Elemental Transmogrification',
       'Fire Resistance', 'Natural Armor', 'Heat Resistance',
       'Matter Absorption', 'Regeneration', 'Stealth', 'Power Suit',
       'Energy Blasts', 'Energy Beams', 'Heat Generation', 'Danger Sense',
       'Phasing', 'Force Fields', 'Hypnokinesis', 'Invisibility',
       'Enhanced Senses', 'Jump', 'Shapeshifting', 'Elasticity',
 

In [40]:
for col in cols_to_make:
    SP[col] = SP['Powers'].str.contains(col)
SP.head()


  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] =

  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] =

  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] =

  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] =

  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)
  SP[col] = SP['Powers'].str.contains(col)


Unnamed: 0,hero_names,Powers,"Agility,Super Strength,Stamina,Super Speed","Accelerated Healing,Durability,Longevity,Super Strength,Stamina,Camouflage,Self-Sustenance","Agility,Accelerated Healing,Cold Resistance,Durability,Underwater breathing,Marksmanship,Weapons Master,Longevity,Intelligence,Super Strength,Telepathy,Stamina,Immortality,Reflexes,Enhanced Sight,Sub-Mariner",Lantern Power Ring,"Accelerated Healing,Intelligence,Super Strength,Stamina,Super Speed,Invulnerability,Animation,Super Breath","Dimensional Awareness,Flight,Intelligence,Super Strength,Size Changing,Super Speed,Teleportation,Magic,Dimensional Travel,Immortality,Invulnerability,Molecular Manipulation,Energy Manipulation,Power Cosmic","Cold Resistance,Durability,Energy Absorption,Super Strength,Invulnerability,Elemental Transmogrification,Fire Resistance,Natural Armor,Molecular Manipulation,Heat Resistance,Matter Absorption","Accelerated Healing,Immortality,Regeneration",...,Weather Control,Omnipresent,Omniscient,Hair Manipulation,Nova Force,Odin Force,Phoenix Force,Intuitive aptitude,Melting,Changing Armor
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed",True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super...",False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du...",False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,Abin Sur,Lantern Power Ring,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt...",False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
