# Applying Advanced Transformations (Core)

#### Import Library

In [12]:
## standard imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
## Importing the OS and JSON Modules
import os,json

#### Loading dataframes

In [13]:
sh_info = pd.read_csv("data/superhero_infosuperhero_info.csv")

In [40]:
sh_powers = pd.read_csv("data/superhero_powers - superhero_powers.csv")

#### Reviewing Super Hero Info df

In [15]:
sh_info.head()

Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}"
2,Abin Sur|DC Comics,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}"
3,Abomination|Marvel Comics,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
4,Absorbing Man|Marvel Comics,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}"


#### Focusing on Hero|Publisher Column

In [16]:
sh_info['Hero|Publisher']

0               A-Bomb|Marvel Comics
1       Abe Sapien|Dark Horse Comics
2                 Abin Sur|DC Comics
3          Abomination|Marvel Comics
4        Absorbing Man|Marvel Comics
                   ...              
458       Yellowjacket|Marvel Comics
459    Yellowjacket II|Marvel Comics
460                Yoda|George Lucas
461                Zatanna|DC Comics
462                   Zoom|DC Comics
Name: Hero|Publisher, Length: 463, dtype: object

In [17]:
sh_info['Hero|Publisher'].str.split('|',expand=True)

Unnamed: 0,0,1
0,A-Bomb,Marvel Comics
1,Abe Sapien,Dark Horse Comics
2,Abin Sur,DC Comics
3,Abomination,Marvel Comics
4,Absorbing Man,Marvel Comics
...,...,...
458,Yellowjacket,Marvel Comics
459,Yellowjacket II,Marvel Comics
460,Yoda,George Lucas
461,Zatanna,DC Comics


#### Transforming Column

In [18]:
sh_info[['Hero','Publisher']] = sh_info['Hero|Publisher'].str.split('|',expand=True)
sh_info.head(2)

Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics


#### Dropping Column

In [19]:
## drop the original column
sh_info = sh_info.drop(columns=['Hero|Publisher'])

In [20]:
sh_info.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics
2,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}",Abin Sur,DC Comics
3,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",Abomination,Marvel Comics
4,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}",Absorbing Man,Marvel Comics


#### Focusing on Measurements Column

In [21]:
## examining a single value from the coordinates col
htwt = sh_info.loc[0,"Measurements"]
print(type(htwt))
htwt


<class 'str'>


"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"

#### Formatting Column

In [22]:
## use .str.replace to replace all single quotes
sh_info['Measurements'] = sh_info['Measurements'].str.replace("'",'"')
## Apply the json.loads to the full column
sh_info['Measurements'] = sh_info['Measurements'].apply(json.loads)
sh_info['Measurements'].head()



0    {'Height': '203.0 cm', 'Weight': '441.0 kg'}
1     {'Height': '191.0 cm', 'Weight': '65.0 kg'}
2     {'Height': '185.0 cm', 'Weight': '90.0 kg'}
3    {'Height': '203.0 cm', 'Weight': '441.0 kg'}
4    {'Height': '193.0 cm', 'Weight': '122.0 kg'}
Name: Measurements, dtype: object

#### Unpacking Dictionaries

In [23]:
ht_wt = sh_info['Measurements'].apply(pd.Series)
ht_wt

Unnamed: 0,Height,Weight
0,203.0 cm,441.0 kg
1,191.0 cm,65.0 kg
2,185.0 cm,90.0 kg
3,203.0 cm,441.0 kg
4,193.0 cm,122.0 kg
...,...,...
458,183.0 cm,83.0 kg
459,165.0 cm,52.0 kg
460,66.0 cm,17.0 kg
461,170.0 cm,57.0 kg


#### Adding to DataFrame

In [24]:
sh_info =pd.concat((sh_info, ht_wt), axis = 1)
sh_info.head(2)

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher,Height,Weight
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics,203.0 cm,441.0 kg
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics,191.0 cm,65.0 kg


#### Dropping Column

In [25]:
sh_info = sh_info.drop(columns=['Measurements'])


In [26]:
sh_info.head(2)

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height,Weight
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203.0 cm,441.0 kg
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191.0 cm,65.0 kg



#### Unique values

In [27]:
sh_info['Weight'].unique()

array(['441.0 kg', '65.0 kg', '90.0 kg', '122.0 kg', '88.0 kg', '81.0 kg',
       '104.0 kg', '108.0 kg', '169.0 kg', '173.0 kg', '101.0 kg',
       '68.0 kg', '57.0 kg', '54.0 kg', '83.0 kg', '86.0 kg', '358.0 kg',
       '135.0 kg', '106.0 kg', '146.0 kg', '63.0 kg', '98.0 kg',
       '270.0 kg', '59.0 kg', '50.0 kg', '126.0 kg', '67.0 kg',
       '180.0 kg', '77.0 kg', '52.0 kg', '61.0 kg', '95.0 kg', '79.0 kg',
       '133.0 kg', '181.0 kg', '216.0 kg', '71.0 kg', '124.0 kg',
       '155.0 kg', '113.0 kg', '58.0 kg', '92.0 kg', '97.0 kg', '56.0 kg',
       '230.0 kg', '495.0 kg', '55.0 kg', '99.0 kg', '158.0 kg',
       '74.0 kg', '116.0 kg', '170.0 kg', '70.0 kg', '225.0 kg',
       '817.0 kg', '27.0 kg', '91.0 kg', '178.0 kg', '383.0 kg',
       '171.0 kg', '187.0 kg', '132.0 kg', '89.0 kg', '110.0 kg',
       '412.0 kg', '306.0 kg', '80.0 kg', '203.0 kg', '96.0 kg',
       '18.0 kg', '45.0 kg', '167.0 kg', '16.0 kg', '630.0 kg',
       '268.0 kg', '62.0 kg', '115.0 kg', '4.0 kg'

In [28]:
sh_info['Height'].unique()

array(['203.0 cm', '191.0 cm', '185.0 cm', '193.0 cm', '178.0 cm',
       '188.0 cm', '180.0 cm', '244.0 cm', '257.0 cm', '183.0 cm',
       '165.0 cm', '163.0 cm', '211.0 cm', '229.0 cm', '213.0 cm',
       '175.0 cm', '173.0 cm', '198.0 cm', '168.0 cm', '170.0 cm',
       '201.0 cm', '218.0 cm', '196.0 cm', '157.0 cm', '226.0 cm',
       '267.0 cm', '122.0 cm', '975.0 cm', '142.0 cm', '876.0 cm',
       '62.5 cm', '701.0 cm', '259.0 cm', '155.0 cm', '71.0 cm',
       '287.0 cm', '234.0 cm', '64.0 cm', '366.0 cm', '206.0 cm',
       '305.0 cm', '137.0 cm', '279.0 cm', '15.2 cm', '160.0 cm',
       '66.0 cm'], dtype=object)

#### Finding and removing duplicat rows

In [29]:
#find number of duplicat rows
sh_info.duplicated().sum()


0

#### Finding Missing Values

In [30]:
# Display the total number of missing values
print(f'There are {sh_info.isna().sum().sum()} missing values.')

There are 0 missing values.


#### Reviewing Super Hero Powers df

In [41]:
sh_powers.head()

Unnamed: 0,hero_names,Powers
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super..."
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du..."
3,Abin Sur,Lantern Power Ring
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt..."


#### Columns that contain lists

In [56]:
sh_powers['Powers']

0             Agility,Super Strength,Stamina,Super Speed
1      Accelerated Healing,Durability,Longevity,Super...
2      Agility,Accelerated Healing,Cold Resistance,Du...
3                                     Lantern Power Ring
4      Accelerated Healing,Intelligence,Super Strengt...
                             ...                        
662                   Flight,Energy Blasts,Size Changing
663    Cold Resistance,Durability,Longevity,Super Str...
664    Agility,Stealth,Danger Sense,Marksmanship,Weap...
665    Cryokinesis,Telepathy,Magic,Fire Control,Proba...
666    Super Speed,Intangibility,Time Travel,Time Man...
Name: Powers, Length: 667, dtype: object

In [74]:
# Assuming your DataFrame is named 'sh_powers' and the column is named 'Powers'
# Convert the 'Powers' column to string type
sh_powers['Powers'] = sh_powers['Powers'].astype(str)



In [77]:
# Assuming your DataFrame is named 'sh_powers' and the column is named 'Powers'
sh_powers['Powers'] = sh_powers['Powers'].astype(str)

# Apply one-hot encoding using pandas get_dummies() and apply(pd.Series)
one_hot_encoded = sh_powers['Powers'].apply(lambda x: pd.Series(x)).stack().str.get_dummies().sum(level=0)

# Concatenate the original DataFrame with the encoded DataFrame
result_df = pd.concat([sh_powers, one_hot_encoded], axis=1)



          hero_names                                             Powers  \
0            3-D Man         Agility,Super Strength,Stamina,Super Speed   
1             A-Bomb  Accelerated Healing,Durability,Longevity,Super...   
2         Abe Sapien  Agility,Accelerated Healing,Cold Resistance,Du...   
3           Abin Sur                                 Lantern Power Ring   
4        Abomination  Accelerated Healing,Intelligence,Super Strengt...   
..               ...                                                ...   
662  Yellowjacket II                 Flight,Energy Blasts,Size Changing   
663             Ymir  Cold Resistance,Durability,Longevity,Super Str...   
664             Yoda  Agility,Stealth,Danger Sense,Marksmanship,Weap...   
665          Zatanna  Cryokinesis,Telepathy,Magic,Fire Control,Proba...   
666             Zoom  Super Speed,Intangibility,Time Travel,Time Man...   

                                          powers_split  Accelerated Healing  \
0           Agility,

  one_hot_encoded = sh_powers['Powers'].apply(lambda x: pd.Series(x)).stack().str.get_dummies().sum(level=0)


In [78]:
sh_powers['Powers']

0             Agility,Super Strength,Stamina,Super Speed
1      Accelerated Healing,Durability,Longevity,Super...
2      Agility,Accelerated Healing,Cold Resistance,Du...
3                                     Lantern Power Ring
4      Accelerated Healing,Intelligence,Super Strengt...
                             ...                        
662                   Flight,Energy Blasts,Size Changing
663    Cold Resistance,Durability,Longevity,Super Str...
664    Agility,Stealth,Danger Sense,Marksmanship,Weap...
665    Cryokinesis,Telepathy,Magic,Fire Control,Proba...
666    Super Speed,Intangibility,Time Travel,Time Man...
Name: Powers, Length: 667, dtype: object