In [14]:
import pandas as pd

# Load the datasets
info_df = pd.read_csv("DataFiles/superhero_info - superhero_info.csv")
powers_df = pd.read_csv("DataFiles/superhero_powers - superhero_powers.csv")

# Merge the datasets on the 'Hero' column
combined_df = pd.merge(info_df, powers_df, left_on='Hero|Publisher', right_on='hero_names', how='left')

# Drop redundant columns
combined_df.drop(['hero_names'], axis=1, inplace=True)

# Extract numeric values from 'Measurements' column and create 'Height' and 'Weight' columns
measurements_df = combined_df['Measurements'].str.extract('(?P<Height>\d+\.\d+|\d+) cm (?P<Weight>\d+\.\d+|\d+) kg')
combined_df = pd.concat([combined_df, measurements_df], axis=1)

# Convert 'Height' and 'Weight' to numeric
combined_df['Height'] = pd.to_numeric(combined_df['Height'])
combined_df['Weight'] = pd.to_numeric(combined_df['Weight'])

# Drop the original 'Measurements' column
combined_df.drop(['Measurements'], axis=1, inplace=True)

# Fill NaN values in powers columns with 0,
powers_list = powers_df['Powers'].str.get_dummies(sep=', ')
combined_df = pd.concat([combined_df, powers_list], axis=1)
combined_df.update(combined_df.filter(like='_power').fillna(0))

# Identify superheroes with Super Speed power
#its a bit confusing here since theres no super speed column
combined_df['Super Speed'] = combined_df['Powers'].apply(lambda x: 1 if 'Super Speed' in str(x) else 0)

# Calculate the average weight for superheroes with Super Speed and those without
super_speed_avg_weight = combined_df.loc[combined_df['Super Speed'] == 1, 'Weight'].mean()
non_super_speed_avg_weight = combined_df.loc[combined_df['Super Speed'] == 0, 'Weight'].mean()

print("Average weight of superheroes with Super Speed:", super_speed_avg_weight)
print("Average weight of superheroes without Super Speed:", non_super_speed_avg_weight)

# Calculate the average height of heroes for each publisher
average_height_by_publisher = combined_df.groupby('Hero|Publisher')['Height'].mean()
print("Average height of heroes by publisher:")
print(average_height_by_publisher)

Average weight of superheroes with Super Speed: nan
Average weight of superheroes without Super Speed: nan
Average height of heroes by publisher:
Hero|Publisher
A-Bomb|Marvel Comics            NaN
Abe Sapien|Dark Horse Comics    NaN
Abin Sur|DC Comics              NaN
Abomination|Marvel Comics       NaN
Absorbing Man|Marvel Comics     NaN
                                 ..
Yellowjacket II|Marvel Comics   NaN
Yellowjacket|Marvel Comics      NaN
Yoda|George Lucas               NaN
Zatanna|DC Comics               NaN
Zoom|DC Comics                  NaN
Name: Height, Length: 457, dtype: float64
