In [1]:
import requests
import pandas as pd
import re


In [2]:
url = 'https://cdn.jsdelivr.net/gh/akabab/superhero-api@0.3.0/api/all.json'

response = requests.get(url)

all_superheroes_data = response.json()
df = pd.DataFrame(all_superheroes_data)

In [3]:
df.dropna(inplace=True)
df.drop(columns=['work', 'connections', 'images'], inplace=True)

In [4]:
powerstats_df = pd.json_normalize(df['powerstats'])

appearance_df = pd.json_normalize(df['appearance'])

biography_df = pd.json_normalize(df['biography'])

combined_df = pd.concat([df[['id', 'name', 'slug']], powerstats_df, appearance_df,biography_df], axis=1)

In [5]:
combined_df.dropna(inplace=True)

In [6]:
skills_columns = ['intelligence', 'strength', 'speed', 'durability', 'power', 'combat']

combined_df.insert(9, 'Overall PS', combined_df[skills_columns].sum(axis=1))

In [7]:
combined_df

Unnamed: 0,id,name,slug,intelligence,strength,speed,durability,power,combat,Overall PS,...,weight,eyeColor,hairColor,fullName,alterEgos,aliases,placeOfBirth,firstAppearance,publisher,alignment
0,1,A-Bomb,1-a-bomb,38,100,17,80,24,64,323,...,"[980 lb, 441 kg]",Yellow,No Hair,Richard Milhouse Jones,No alter egos found.,[Rick Jones],"Scarsdale, Arizona","Hulk Vol 2 #2 (April, 2008) (as A-Bomb)",Marvel Comics,good
1,2,Abe Sapien,2-abe-sapien,88,28,35,65,100,85,401,...,"[145 lb, 65 kg]",Blue,No Hair,Abraham Sapien,No alter egos found.,"[Langdon Everett Caul, Abraham Sapien, Langdon...",-,Hellboy: Seed of Destruction (1993),Dark Horse Comics,good
2,3,Abin Sur,3-abin-sur,50,90,53,64,99,65,421,...,"[200 lb, 90 kg]",Blue,No Hair,,No alter egos found.,[Lagzia],Ungara,"Showcase #22 (October, 1959)",DC Comics,good
3,4,Abomination,4-abomination,63,80,53,90,62,95,443,...,"[980 lb, 441 kg]",Green,No Hair,Emil Blonsky,No alter egos found.,"[Agent R-7, Ravager of Worlds]","Zagreb, Yugoslavia",Tales to Astonish #90,Marvel Comics,bad
4,5,Abraxas,5-abraxas,88,63,83,100,100,55,489,...,"[- lb, 0 kg]",Blue,Black,Abraxas,No alter egos found.,[-],Within Eternity,Fantastic Four Annual #2001,Marvel Comics,bad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
557,726,Yellowjacket,726-yellowjacket,88,10,12,28,12,14,164,...,"[185 lb, 83 kg]",Blue,Blond,Hank Pym,"Ant-Man, Giant-Man, Goliath, Wasp II",[Hank Pym],"Elmsford, New York","(as Pym) TALES TO ASTONISH #27, (as Ant-Man) T...",Ant-Man,good
558,727,Yellowjacket II,727-yellowjacket-ii,50,10,35,28,31,28,182,...,"[115 lb, 52 kg]",Blue,Strawberry Blond,Rita DeMara,No alter egos found.,[-],-,Avengers #264,Marvel Comics,good
559,728,Ymir,728-ymir,50,100,27,100,98,28,403,...,"[- lb, 0 kg]",White,No Hair,Ymir,No alter egos found.,[Aurgelmir],Niffleheim,JOURNEY INTO MYSTERY #97,Marvel Comics,good
560,729,Yoda,729-yoda,88,52,33,25,100,90,388,...,"[38 lb, 17 kg]",Brown,White,Yoda,No alter egos found.,[-],-,Star Wars: Episode V - The Empire Strikes Back...,George Lucas,good


In [8]:
df_id_name = combined_df[['id', 'name']]

df_powerstats = combined_df[['id', 'intelligence', 'strength', 'speed', 'durability', 'power', 'combat', 'Overall PS']]

df_appearance = combined_df[['id', 'gender', 'race', 'height', 'weight']]

df_biography = combined_df[['id','firstAppearance', 'publisher', 'alignment']]

In [9]:
df_powerstats.columns = df_powerstats.columns.str.capitalize()
df_powerstats

Unnamed: 0,Id,Intelligence,Strength,Speed,Durability,Power,Combat,Overall ps
0,1,38,100,17,80,24,64,323
1,2,88,28,35,65,100,85,401
2,3,50,90,53,64,99,65,421
3,4,63,80,53,90,62,95,443
4,5,88,63,83,100,100,55,489
...,...,...,...,...,...,...,...,...
557,726,88,10,12,28,12,14,164
558,727,50,10,35,28,31,28,182
559,728,50,100,27,100,98,28,403
560,729,88,52,33,25,100,90,388


In [None]:
df_appearance.columns = df_appearance.columns.str.capitalize()

def extract_numeric_with_units(text):
    if text is None:
        return ''
    matches = re.findall(r'\d+\s*[a-zA-Z]+', text)
    return matches[0] if matches else ''

df_appearance.loc[:, 'Height'] = df_appearance['Height'].apply(extract_numeric_with_units)
df_appearance.loc[:, 'Weight'] = df_appearance['Weight'].apply(extract_numeric_with_units)

In [None]:
df_appearance

In [None]:
df_biography.columns = df_biography.columns.str.capitalize()

In [None]:
df_biography_copy = df_biography.copy()

df_biography_copy.rename(columns={'Firstappearance': 'First_appearance'}, inplace=True)

In [None]:
df_biography = df_biography_copy

In [None]:
df_biography

In [None]:
def extract_year(appearance):
    year_pattern = r'\b\d{4}\b'
    match = re.search(year_pattern, appearance)
    
    if match:
        return match.group()
    else:
        return None

if 'First_appearance' in df_biography.columns:
    df_biography['Year'] = df_biography['First_appearance'].apply(extract_year)
else:
    print("not found")

In [None]:
df_biography.drop(columns=['First_appearance'], inplace=True)
df_biography

In [None]:
#df_id_name.to_csv('id_name.csv', index=False)
#df_powerstats.to_csv('powerstats.csv', index=False)
#df_appearance.to_csv('appearance.csv', index=False)
#df_biography.to_csv('biography.csv', index=False)

In [None]:
df_biography['Year'] = df_biography['Year'].replace('2099', '1993')
df_biography.Year.unique()