# FBI Wanted

[FBI Wanted API](https://www.fbi.gov/wanted/api) provides information on individuals currently considered as most wanted criminals. API returns JSON data with a nested structure. To work with this data, it must be parsed.

In [1]:
import requests
from json import loads
import pandas as pd
import time


In [2]:
#create dataframe placeholder
fbi_wanted_df = pd.DataFrame({
    "title" : pd.Series(dtype="str")
    , "description" : pd.Series(dtype="str")
    , "subject" : pd.Series(dtype="str")
    , "status" : pd.Series(dtype="str")
    , "caution" : pd.Series(dtype="str")
    , "aliaces" : pd.Series(dtype="str")
    , "reward_text" : pd.Series(dtype="str")
    , "publication" : pd.Series(dtype="datetime64[ns]")
    , "sex" : pd.Series(dtype="str")
    , "race" : pd.Series(dtype="str")
    , "hair" : pd.Series(dtype="str")
    , "height_max" : pd.Series(dtype="float")
    , "nationality" : pd.Series(dtype="str")
    , "age_range" : pd.Series(dtype="str")
    , "weight" : pd.Series(dtype="str")
    , "dates_of_birth_used" : pd.Series(dtype="str")
    , "place_of_birth" : pd.Series(dtype="str")
    , "languages" : pd.Series(dtype="str")
    , "warning_message" : pd.Series(dtype="str")
    , "occupations" : pd.Series(dtype="str")
    , "complexion" : pd.Series(dtype="str")
})

In [3]:
#create API request
def get_fbi_wanted(page) -> dict:

    content = None

    headers = {
        "Accept" : "application/json"
        , "Connection" : "keep-alive"
        , "Host" : "api.fbi.gov"
        , "User-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15"
        , "Accept-Language" : "en-GB,en;q=0.9"
    }

    res = requests.get(f"https://api.fbi.gov/wanted/v1/list?page={page}", headers=headers)
    try:
        content = loads(res.content)
    except Exception as ex:
        print(res.content)

    return content

In [4]:
#parse the API response and create pandas dataframe
def append_df(content: dict, df: pd.DataFrame) -> pd.DataFrame:
    title_list = []
    description_list = []
    subject_list = []
    status_list = []
    caution_list = []
    reward_text_list = []
    publication_list = []
    aliases_list = []
    sex_list = []
    race_list = []
    nationality_list = []
    age_range_list = []
    weight_list = []
    dates_of_birth_used_list = []
    hair_list = []
    height_max_list = []
    place_of_birth_list = []
    languages_list = []
    warning_message_list = []
    occupations_list = []
    complexion_list = []

    for i in content["items"]:
        title_list.append(i["title"])
        description_list.append(i["description"])
        status_list.append(i["status"])
        caution_list.append(i["caution"])
        reward_text_list.append(i["reward_text"])
        sex_list.append(i["sex"])
        publication_list.append(i["publication"])
        race_list.append(i["race"])
        nationality_list.append(i["nationality"])
        age_range_list.append(i["age_range"])
        weight_list.append(i["weight"])
        hair_list.append(i["hair"])
        height_max_list.append(i["height_max"])
        place_of_birth_list.append(i["place_of_birth"])
        warning_message_list.append(i["warning_message"])
        complexion_list.append(i["complexion"])

        if i["subjects"] is not None:
            if len(i["subjects"]) > 1:
                subject = ""
                for s in i["subjects"]:
                    subject += s + ", "
                subject_list.append(subject[:-2])
            elif len(i["subjects"]) == 1:
                subject_list.append(i["subjects"][0])
            else:
                subject_list.append(None)
        else:
            subject_list.append(None)

        if i["aliases"] is not None:
            if len(i["aliases"]) > 1:
                aliaces = ""
                for a in i["aliases"]:
                    aliaces += a + ", "
                aliases_list.append(aliaces[:-2])
            elif len(i["aliases"]) == 1:
                aliases_list.append(i["aliases"][0])
            else:
                aliases_list.append(None)
        else:
            aliases_list.append(None)

        if i["dates_of_birth_used"] is not None:
            if len(i["dates_of_birth_used"]) > 1:
                dbo = ""
                for s in i["dates_of_birth_used"]:
                    dbo += s + ", "
                dates_of_birth_used_list.append(dbo[:-2])
            elif len(i["dates_of_birth_used"]) == 1:
                dates_of_birth_used_list.append(i["dates_of_birth_used"][0])
            else:
                dates_of_birth_used_list.append(None)
        else:
            dates_of_birth_used_list.append(None)
        
        if i["languages"] is not None:
            if len(i["languages"]) > 1:
                languages = ""
                for s in i["languages"]:
                    languages += s + ", "
                languages_list.append(languages[:-2])
            elif len(i["languages"]) == 1:
                languages_list.append(i["languages"][0])
            else:
                languages_list.append(None)
        else:
            languages_list.append(None)

        if i["occupations"] is not None:
            if len(i["occupations"]) > 1:
                occupations = ""
                for s in i["occupations"]:
                    occupations += s + ", "
                occupations_list.append(occupations[:-2])
            elif len(i["occupations"]) == 1:
                occupations_list.append(i["occupations"][0])
            else:
                occupations_list.append(None)
        else:
            occupations_list.append(None)


    #create new row
    new_row = pd.DataFrame({
        "title" : title_list
        , "description" : description_list
        , "subject" : subject_list
        , "status" : status_list
        , "caution" : caution_list
        , "aliaces" : aliases_list
        , "reward_text" : reward_text_list
        , "publication" : publication_list
        , "sex" : sex_list
        , "race" : race_list
        , "nationality" : nationality_list
        , "age_range" : age_range_list
        , "weight" : weight_list
        , "dates_of_birth_used" : dates_of_birth_used_list
        , "hair" : hair_list
        , "height_max" : height_max_list
        , "place_of_birth" : place_of_birth_list
        , "languages" : languages_list
        , "warning_message" : warning_message_list
        , "occupations" : occupations_list
        , "complexion" : complexion_list
    })

    df = pd.concat([df, new_row], ignore_index=True)

    return df

    

In [5]:
#run scripts to form dataframe
page = 1

fbi_wanted = get_fbi_wanted(page)

while fbi_wanted["total"] > page * len(fbi_wanted["items"]):

    #increment page
    page += 1

    #sleep to avoid security blocking
    time.sleep(60)
    
    #get data
    content = get_fbi_wanted(page)
    

    #insert data into dataframe
    fbi_wanted_df = append_df(content, fbi_wanted_df)


In [9]:
fbi_wanted_df.head()

Unnamed: 0,title,description,subject,status,caution,aliaces,reward_text,publication,sex,race,...,height_max,nationality,age_range,weight,dates_of_birth_used,place_of_birth,languages,warning_message,occupations,complexion
0,"JASON WILLIAM DIES - SAN DIEGO, CALIFORNIA","June 18, 1991\r\nSan Diego, California",ViCAP Missing Persons,na,,"William Diaz, Bill Diaz, Bill Dies, William Dies",,2018-08-07T12:35:00,Male,hispanic,...,71.0,,20 years old (at time of disappearance),130 to 160 pounds,1970.0,,,,,
1,"JENNIFER L. WILSON - DERBY, KANSAS","Missing since September 1, 2002",ViCAP Missing Persons,na,,,,2017-08-24T08:42:00,Female,white,...,69.0,,,110 - 125 pounds,,,,,,
2,"TONITA MICHELLE BROOKS - DURHAM, NORTH CAROLINA","August 26, 2019\r\nDurham, North Carolina",ViCAP Missing Persons,na,,,,2022-09-16T07:14:00,Female,black,...,64.0,,45 years old (at time of last contact),120 pounds,,,,,,
3,"JANE DOE - FREDERICK, MARYLAND","Frederick, Maryland\r\nJuly 31, 1991",ViCAP Unidentified Persons,na,,,,2010-10-26T19:50:00,Female,white,...,62.0,,17 to 25 years old,110 to 120 pounds,,,,,,
4,HATE CRIME,"Unknown Suspect\r\nSt. Louis, Missouri\r\nApri...",Seeking Information,na,,,"The FBI is offering a reward of up to $10,000 ...",2023-11-28T11:52:00,,,...,,,,,,,,SHOULD BE CONSIDERED ARMED AND DANGEROUS,,


In [6]:
#save data as csv file
fbi_wanted_df.to_csv("fbi_wanted.csv", index=False)