# Star Wars Data Analysis

## Section 1: Setup imports and dataframes

In [1]:
# Uncomment the following lines to install the necessary libraries for the async calls

#!pip install asyncio
#!pip install aiohttp

In [2]:
# Import necessary libraries for dataframes, HTTP requests, JSON, and charts

import pandas as pd
from prophet import Prophet
import datetime as dt
import numpy as np
import requests
import json
import matplotlib.pyplot as plt
import asyncio
import aiohttp

%matplotlib inline

In [3]:
# Set the URLs for the APIs for each category SWAPI provides

films_url = "https://swapi.dev/api/films/"
people_url = "https://swapi.dev/api/people/"
planets_url = "https://swapi.dev/api/planets/"
species_url = "https://swapi.dev/api/species/"
starships_url = "https://swapi.dev/api/starships/"
vehicles_url = "https://swapi.dev/api/vehicles/"

In [4]:
# Retrieve the limit each category has for API requests

def retrieve_pages(url):
    response = requests.get(url)
    data = response.json()
    
    total_records = 10
    try:
        total_records = data["count"]
    except:
        pass
    
    total_pages = int(total_records/10) + (1 if total_records%10 > 0 else 0)

    return total_pages

In [5]:
# Get the limits for each category

films_pages = retrieve_pages(films_url)
people_pages = retrieve_pages(people_url)  
planets_pages = retrieve_pages(planets_url)
species_pages = retrieve_pages(species_url)
starships_pages = retrieve_pages(starships_url)
vehicles_pages = retrieve_pages(vehicles_url)

print(f"The number of pages of films is {films_pages}")
print(f"The number of pages of people is {people_pages}")
print(f"The number of pages of planets is {planets_pages}")
print(f"The number of pages of species is {species_pages}")
print(f"The number of pages of starships is {starships_pages}")
print(f"The number of pages of vehicles is {vehicles_pages}")

The number of pages of films is 1
The number of pages of people is 9
The number of pages of planets is 6
The number of pages of species is 4
The number of pages of starships is 4
The number of pages of vehicles is 4


In [6]:
# Function to fetch data from a URL asynchronously, reducing response time by about 40% from synchronously  

async def fetch(url):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            return await response.json()
        
# Define function to call url based on number of pages and append JSON from results property
async def assemble_json(url, pages):
    total_json = []
    print(f"Retrieving {pages} pages from {url}")
    for page in range(1, pages+1):
        composed_url = f"{url}?page={page}"
        # print(f"\tRetrieving {composed_url}")
        req_json = await fetch(composed_url)
        total_json.extend(req_json["results"]) 
    print(f"Found {len(total_json)} records at {url}")
    return total_json        

In [7]:
# Call assemble_json with each SWAPI URL and number of pages

# Run the async tasks
results = await asyncio.gather(
    assemble_json(films_url, films_pages),
    assemble_json(people_url, people_pages),
    assemble_json(planets_url, planets_pages),
    assemble_json(species_url, species_pages),
    assemble_json(starships_url, starships_pages),
    assemble_json(vehicles_url, vehicles_pages),
)

film_data, people_data, planets_data, species_data, starships_data, vehicles_data = results


Retrieving 1 pages from https://swapi.dev/api/films/
Retrieving 9 pages from https://swapi.dev/api/people/
Retrieving 6 pages from https://swapi.dev/api/planets/
Retrieving 4 pages from https://swapi.dev/api/species/
Retrieving 4 pages from https://swapi.dev/api/starships/
Retrieving 4 pages from https://swapi.dev/api/vehicles/
Found 6 records at https://swapi.dev/api/films/
Found 36 records at https://swapi.dev/api/starships/
Found 39 records at https://swapi.dev/api/vehicles/
Found 37 records at https://swapi.dev/api/species/
Found 60 records at https://swapi.dev/api/planets/
Found 82 records at https://swapi.dev/api/people/


In [8]:
# Create dataframes for each category
# Need to explode the arrays in films_df to get the data in a usable format
films_df = pd.DataFrame(film_data)
films_df.attrs['data'] = 'films'

people_df = pd.DataFrame(people_data)
people_df.attrs['data'] = 'people'

planets_df = pd.DataFrame(planets_data)
planets_df.attrs['data'] = 'planets'

species_df = pd.DataFrame(species_data)
species_df.attrs['data'] = 'species'

starships_df = pd.DataFrame(starships_data)
starships_df.attrs['data'] = 'starships'

vehicles_df = pd.DataFrame(vehicles_data)
vehicles_df.attrs['data'] = 'vehicles'

# Display the first 5 rows of each dataframe
display(films_df.head())
display(people_df.head())
display(planets_df.head())
display(species_df.head())
display(starships_df.head())
display(vehicles_df.head())

Unnamed: 0,title,episode_id,opening_crawl,director,producer,release_date,characters,planets,starships,vehicles,species,created,edited,url
0,A New Hope,4,It is a period of civil war.\r\nRebel spaceshi...,George Lucas,"Gary Kurtz, Rick McCallum",1977-05-25,"[https://swapi.dev/api/people/1/, https://swap...","[https://swapi.dev/api/planets/1/, https://swa...","[https://swapi.dev/api/starships/2/, https://s...","[https://swapi.dev/api/vehicles/4/, https://sw...","[https://swapi.dev/api/species/1/, https://swa...",2014-12-10T14:23:31.880000Z,2014-12-20T19:49:45.256000Z,https://swapi.dev/api/films/1/
1,The Empire Strikes Back,5,It is a dark time for the\r\nRebellion. Althou...,Irvin Kershner,"Gary Kurtz, Rick McCallum",1980-05-17,"[https://swapi.dev/api/people/1/, https://swap...","[https://swapi.dev/api/planets/4/, https://swa...","[https://swapi.dev/api/starships/3/, https://s...","[https://swapi.dev/api/vehicles/8/, https://sw...","[https://swapi.dev/api/species/1/, https://swa...",2014-12-12T11:26:24.656000Z,2014-12-15T13:07:53.386000Z,https://swapi.dev/api/films/2/
2,Return of the Jedi,6,Luke Skywalker has returned to\r\nhis home pla...,Richard Marquand,"Howard G. Kazanjian, George Lucas, Rick McCallum",1983-05-25,"[https://swapi.dev/api/people/1/, https://swap...","[https://swapi.dev/api/planets/1/, https://swa...","[https://swapi.dev/api/starships/2/, https://s...","[https://swapi.dev/api/vehicles/8/, https://sw...","[https://swapi.dev/api/species/1/, https://swa...",2014-12-18T10:39:33.255000Z,2014-12-20T09:48:37.462000Z,https://swapi.dev/api/films/3/
3,The Phantom Menace,1,Turmoil has engulfed the\r\nGalactic Republic....,George Lucas,Rick McCallum,1999-05-19,"[https://swapi.dev/api/people/2/, https://swap...","[https://swapi.dev/api/planets/1/, https://swa...","[https://swapi.dev/api/starships/31/, https://...","[https://swapi.dev/api/vehicles/33/, https://s...","[https://swapi.dev/api/species/1/, https://swa...",2014-12-19T16:52:55.740000Z,2014-12-20T10:54:07.216000Z,https://swapi.dev/api/films/4/
4,Attack of the Clones,2,There is unrest in the Galactic\r\nSenate. Sev...,George Lucas,Rick McCallum,2002-05-16,"[https://swapi.dev/api/people/2/, https://swap...","[https://swapi.dev/api/planets/1/, https://swa...","[https://swapi.dev/api/starships/21/, https://...","[https://swapi.dev/api/vehicles/4/, https://sw...","[https://swapi.dev/api/species/1/, https://swa...",2014-12-20T10:57:57.886000Z,2014-12-20T20:18:48.516000Z,https://swapi.dev/api/films/5/


Unnamed: 0,name,height,mass,hair_color,skin_color,eye_color,birth_year,gender,homeworld,films,species,vehicles,starships,created,edited,url
0,Luke Skywalker,172,77,blond,fair,blue,19BBY,male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],"[https://swapi.dev/api/vehicles/14/, https://s...","[https://swapi.dev/api/starships/12/, https://...",2014-12-09T13:50:51.644000Z,2014-12-20T21:17:56.891000Z,https://swapi.dev/api/people/1/
1,C-3PO,167,75,,gold,yellow,112BBY,,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[https://swapi.dev/api/species/2/],[],[],2014-12-10T15:10:51.357000Z,2014-12-20T21:17:50.309000Z,https://swapi.dev/api/people/2/
2,R2-D2,96,32,,"white, blue",red,33BBY,,https://swapi.dev/api/planets/8/,"[https://swapi.dev/api/films/1/, https://swapi...",[https://swapi.dev/api/species/2/],[],[],2014-12-10T15:11:50.376000Z,2014-12-20T21:17:50.311000Z,https://swapi.dev/api/people/3/
3,Darth Vader,202,136,none,white,yellow,41.9BBY,male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[],[https://swapi.dev/api/starships/13/],2014-12-10T15:18:20.704000Z,2014-12-20T21:17:50.313000Z,https://swapi.dev/api/people/4/
4,Leia Organa,150,49,brown,light,brown,19BBY,female,https://swapi.dev/api/planets/2/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[https://swapi.dev/api/vehicles/30/],[],2014-12-10T15:20:09.791000Z,2014-12-20T21:17:50.315000Z,https://swapi.dev/api/people/5/


Unnamed: 0,name,rotation_period,orbital_period,diameter,climate,gravity,terrain,surface_water,population,residents,films,created,edited,url
0,Tatooine,23,304,10465,arid,1 standard,desert,1,200000,"[https://swapi.dev/api/people/1/, https://swap...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-09T13:50:49.641000Z,2014-12-20T20:58:18.411000Z,https://swapi.dev/api/planets/1/
1,Alderaan,24,364,12500,temperate,1 standard,"grasslands, mountains",40,2000000000,"[https://swapi.dev/api/people/5/, https://swap...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T11:35:48.479000Z,2014-12-20T20:58:18.420000Z,https://swapi.dev/api/planets/2/
2,Yavin IV,24,4818,10200,"temperate, tropical",1 standard,"jungle, rainforests",8,1000,[],[https://swapi.dev/api/films/1/],2014-12-10T11:37:19.144000Z,2014-12-20T20:58:18.421000Z,https://swapi.dev/api/planets/3/
3,Hoth,23,549,7200,frozen,1.1 standard,"tundra, ice caves, mountain ranges",100,unknown,[],[https://swapi.dev/api/films/2/],2014-12-10T11:39:13.934000Z,2014-12-20T20:58:18.423000Z,https://swapi.dev/api/planets/4/
4,Dagobah,23,341,8900,murky,,"swamp, jungles",8,unknown,[],"[https://swapi.dev/api/films/2/, https://swapi...",2014-12-10T11:42:22.590000Z,2014-12-20T20:58:18.425000Z,https://swapi.dev/api/planets/5/


Unnamed: 0,name,classification,designation,average_height,skin_colors,hair_colors,eye_colors,average_lifespan,homeworld,language,people,films,created,edited,url
0,Human,mammal,sentient,180.0,"caucasian, black, asian, hispanic","blonde, brown, black, red","brown, blue, green, hazel, grey, amber",120,https://swapi.dev/api/planets/9/,Galactic Basic,"[https://swapi.dev/api/people/66/, https://swa...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T13:52:11.567000Z,2014-12-20T21:36:42.136000Z,https://swapi.dev/api/species/1/
1,Droid,artificial,sentient,,,,,indefinite,,,"[https://swapi.dev/api/people/2/, https://swap...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T15:16:16.259000Z,2014-12-20T21:36:42.139000Z,https://swapi.dev/api/species/2/
2,Wookie,mammal,sentient,210.0,gray,"black, brown","blue, green, yellow, brown, golden, red",400,https://swapi.dev/api/planets/14/,Shyriiwook,"[https://swapi.dev/api/people/13/, https://swa...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T16:44:31.486000Z,2014-12-20T21:36:42.142000Z,https://swapi.dev/api/species/3/
3,Rodian,sentient,reptilian,170.0,"green, blue",,black,unknown,https://swapi.dev/api/planets/23/,Galatic Basic,[https://swapi.dev/api/people/15/],[https://swapi.dev/api/films/1/],2014-12-10T17:05:26.471000Z,2014-12-20T21:36:42.144000Z,https://swapi.dev/api/species/4/
4,Hutt,gastropod,sentient,300.0,"green, brown, tan",,"yellow, red",1000,https://swapi.dev/api/planets/24/,Huttese,[https://swapi.dev/api/people/16/],"[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T17:12:50.410000Z,2014-12-20T21:36:42.146000Z,https://swapi.dev/api/species/5/


Unnamed: 0,name,model,manufacturer,cost_in_credits,length,max_atmosphering_speed,crew,passengers,cargo_capacity,consumables,hyperdrive_rating,MGLT,starship_class,pilots,films,created,edited,url
0,CR90 corvette,CR90 corvette,Corellian Engineering Corporation,3500000,150.0,950.0,30-165,600.0,3000000,1 year,2.0,60,corvette,[],"[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T14:20:33.369000Z,2014-12-20T21:23:49.867000Z,https://swapi.dev/api/starships/2/
1,Star Destroyer,Imperial I-class Star Destroyer,Kuat Drive Yards,150000000,1600.0,975.0,47060,,36000000,2 years,2.0,60,Star Destroyer,[],"[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T15:08:19.848000Z,2014-12-20T21:23:49.870000Z,https://swapi.dev/api/starships/3/
2,Sentinel-class landing craft,Sentinel-class landing craft,"Sienar Fleet Systems, Cyngus Spaceworks",240000,38.0,1000.0,5,75.0,180000,1 month,1.0,70,landing craft,[],[https://swapi.dev/api/films/1/],2014-12-10T15:48:00.586000Z,2014-12-20T21:23:49.873000Z,https://swapi.dev/api/starships/5/
3,Death Star,DS-1 Orbital Battle Station,"Imperial Department of Military Research, Sien...",1000000000000,120000.0,,342953,843342.0,1000000000000,3 years,4.0,10,Deep Space Mobile Battlestation,[],[https://swapi.dev/api/films/1/],2014-12-10T16:36:50.509000Z,2014-12-20T21:26:24.783000Z,https://swapi.dev/api/starships/9/
4,Millennium Falcon,YT-1300 light freighter,Corellian Engineering Corporation,100000,34.37,1050.0,4,6.0,100000,2 months,0.5,75,Light freighter,"[https://swapi.dev/api/people/13/, https://swa...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T16:59:45.094000Z,2014-12-20T21:23:49.880000Z,https://swapi.dev/api/starships/10/


Unnamed: 0,name,model,manufacturer,cost_in_credits,length,max_atmosphering_speed,crew,passengers,cargo_capacity,consumables,vehicle_class,pilots,films,created,edited,url
0,Sand Crawler,Digger Crawler,Corellia Mining Corporation,150000,36.8,30,46,30,50000,2 months,wheeled,[],"[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T15:36:25.724000Z,2014-12-20T21:30:21.661000Z,https://swapi.dev/api/vehicles/4/
1,T-16 skyhopper,T-16 skyhopper,Incom Corporation,14500,10.4,1200,1,1,50,0,repulsorcraft,[],[https://swapi.dev/api/films/1/],2014-12-10T16:01:52.434000Z,2014-12-20T21:30:21.665000Z,https://swapi.dev/api/vehicles/6/
2,X-34 landspeeder,X-34 landspeeder,SoroSuub Corporation,10550,3.4,250,1,1,5,unknown,repulsorcraft,[],[https://swapi.dev/api/films/1/],2014-12-10T16:13:52.586000Z,2014-12-20T21:30:21.668000Z,https://swapi.dev/api/vehicles/7/
3,TIE/LN starfighter,Twin Ion Engine/Ln Starfighter,Sienar Fleet Systems,unknown,6.4,1200,1,0,65,2 days,starfighter,[],"[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T16:33:52.860000Z,2014-12-20T21:30:21.670000Z,https://swapi.dev/api/vehicles/8/
4,Snowspeeder,t-47 airspeeder,Incom corporation,unknown,4.5,650,2,0,10,none,airspeeder,"[https://swapi.dev/api/people/1/, https://swap...",[https://swapi.dev/api/films/2/],2014-12-15T12:22:12Z,2014-12-20T21:30:21.672000Z,https://swapi.dev/api/vehicles/14/


In [9]:
# Clean up the Gender column to change all entires that aren't "male" or "female" to be "non-binary"
people_df['gender']= people_df['gender'].apply(lambda x: x if x in ['male', 'female'] else 'Non-Binary').replace({"male": "Male", "female": "Female"})
display(people_df.head())

Unnamed: 0,name,height,mass,hair_color,skin_color,eye_color,birth_year,gender,homeworld,films,species,vehicles,starships,created,edited,url
0,Luke Skywalker,172,77,blond,fair,blue,19BBY,Male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],"[https://swapi.dev/api/vehicles/14/, https://s...","[https://swapi.dev/api/starships/12/, https://...",2014-12-09T13:50:51.644000Z,2014-12-20T21:17:56.891000Z,https://swapi.dev/api/people/1/
1,C-3PO,167,75,,gold,yellow,112BBY,Non-Binary,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[https://swapi.dev/api/species/2/],[],[],2014-12-10T15:10:51.357000Z,2014-12-20T21:17:50.309000Z,https://swapi.dev/api/people/2/
2,R2-D2,96,32,,"white, blue",red,33BBY,Non-Binary,https://swapi.dev/api/planets/8/,"[https://swapi.dev/api/films/1/, https://swapi...",[https://swapi.dev/api/species/2/],[],[],2014-12-10T15:11:50.376000Z,2014-12-20T21:17:50.311000Z,https://swapi.dev/api/people/3/
3,Darth Vader,202,136,none,white,yellow,41.9BBY,Male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[],[https://swapi.dev/api/starships/13/],2014-12-10T15:18:20.704000Z,2014-12-20T21:17:50.313000Z,https://swapi.dev/api/people/4/
4,Leia Organa,150,49,brown,light,brown,19BBY,Female,https://swapi.dev/api/planets/2/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[https://swapi.dev/api/vehicles/30/],[],2014-12-10T15:20:09.791000Z,2014-12-20T21:17:50.315000Z,https://swapi.dev/api/people/5/


In [10]:
#Convert height and mass columns to int type from object. 
people_df[["mass", "height"]] = (
    people_df[["mass", "height"]]
    .astype(str)  
    .apply(lambda x: x.str.replace(",", ""))  # Remove commas (the jabba exception)
    .apply(pd.to_numeric, errors="coerce")  
    .fillna(0)  
    .astype(int)  
)
people_df.head(5)

Unnamed: 0,name,height,mass,hair_color,skin_color,eye_color,birth_year,gender,homeworld,films,species,vehicles,starships,created,edited,url
0,Luke Skywalker,172,77,blond,fair,blue,19BBY,Male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],"[https://swapi.dev/api/vehicles/14/, https://s...","[https://swapi.dev/api/starships/12/, https://...",2014-12-09T13:50:51.644000Z,2014-12-20T21:17:56.891000Z,https://swapi.dev/api/people/1/
1,C-3PO,167,75,,gold,yellow,112BBY,Non-Binary,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[https://swapi.dev/api/species/2/],[],[],2014-12-10T15:10:51.357000Z,2014-12-20T21:17:50.309000Z,https://swapi.dev/api/people/2/
2,R2-D2,96,32,,"white, blue",red,33BBY,Non-Binary,https://swapi.dev/api/planets/8/,"[https://swapi.dev/api/films/1/, https://swapi...",[https://swapi.dev/api/species/2/],[],[],2014-12-10T15:11:50.376000Z,2014-12-20T21:17:50.311000Z,https://swapi.dev/api/people/3/
3,Darth Vader,202,136,none,white,yellow,41.9BBY,Male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[],[https://swapi.dev/api/starships/13/],2014-12-10T15:18:20.704000Z,2014-12-20T21:17:50.313000Z,https://swapi.dev/api/people/4/
4,Leia Organa,150,49,brown,light,brown,19BBY,Female,https://swapi.dev/api/planets/2/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[https://swapi.dev/api/vehicles/30/],[],2014-12-10T15:20:09.791000Z,2014-12-20T21:17:50.315000Z,https://swapi.dev/api/people/5/


In [11]:
print(f"The mean of mass column is {people_df['mass'].mean().round(2)}")
print(f"The max of mass column is {people_df['mass'].max()}")
print(f"The min of height column is {people_df['height'].min()}") # index 27: Arvel Crynyd. 
print(f"The mean of height column is {people_df['height'].mean().round(2)}")
print(f"The max of height column is {people_df['height'].max()}")

The mean of mass column is 70.01
The max of mass column is 1358
The min of height column is 0
The mean of height column is 172.48
The max of height column is 264


In [12]:
#Creates a function that categorizes character mass into ranges: '0-50', '51-100', 'Over 100'

def categorize_mass(mass):
    """Categorizes mass into ranges: '0-50', '51-100', 'Over 100'."""
    if mass <= 50:
        return "0-50"
    elif 51 <= mass <= 100:
        return "51-100"
    else:
        return "Over 100"

people_df["mass range"] = people_df["mass"].apply(categorize_mass)

# Print the DataFrame
display(people_df)


Unnamed: 0,name,height,mass,hair_color,skin_color,eye_color,birth_year,gender,homeworld,films,species,vehicles,starships,created,edited,url,mass range
0,Luke Skywalker,172,77,blond,fair,blue,19BBY,Male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],"[https://swapi.dev/api/vehicles/14/, https://s...","[https://swapi.dev/api/starships/12/, https://...",2014-12-09T13:50:51.644000Z,2014-12-20T21:17:56.891000Z,https://swapi.dev/api/people/1/,51-100
1,C-3PO,167,75,,gold,yellow,112BBY,Non-Binary,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[https://swapi.dev/api/species/2/],[],[],2014-12-10T15:10:51.357000Z,2014-12-20T21:17:50.309000Z,https://swapi.dev/api/people/2/,51-100
2,R2-D2,96,32,,"white, blue",red,33BBY,Non-Binary,https://swapi.dev/api/planets/8/,"[https://swapi.dev/api/films/1/, https://swapi...",[https://swapi.dev/api/species/2/],[],[],2014-12-10T15:11:50.376000Z,2014-12-20T21:17:50.311000Z,https://swapi.dev/api/people/3/,0-50
3,Darth Vader,202,136,none,white,yellow,41.9BBY,Male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[],[https://swapi.dev/api/starships/13/],2014-12-10T15:18:20.704000Z,2014-12-20T21:17:50.313000Z,https://swapi.dev/api/people/4/,Over 100
4,Leia Organa,150,49,brown,light,brown,19BBY,Female,https://swapi.dev/api/planets/2/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[https://swapi.dev/api/vehicles/30/],[],2014-12-10T15:20:09.791000Z,2014-12-20T21:17:50.315000Z,https://swapi.dev/api/people/5/,0-50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,Grievous,216,159,none,"brown, white","green, yellow",unknown,Male,https://swapi.dev/api/planets/59/,[https://swapi.dev/api/films/6/],[https://swapi.dev/api/species/36/],[https://swapi.dev/api/vehicles/60/],[https://swapi.dev/api/starships/74/],2014-12-20T19:43:53.348000Z,2014-12-20T21:17:50.488000Z,https://swapi.dev/api/people/79/,Over 100
78,Tarfful,234,136,brown,brown,blue,unknown,Male,https://swapi.dev/api/planets/14/,[https://swapi.dev/api/films/6/],[https://swapi.dev/api/species/3/],[],[],2014-12-20T19:46:34.209000Z,2014-12-20T21:17:50.491000Z,https://swapi.dev/api/people/80/,Over 100
79,Raymus Antilles,188,79,brown,light,brown,unknown,Male,https://swapi.dev/api/planets/2/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[],[],2014-12-20T19:49:35.583000Z,2014-12-20T21:17:50.493000Z,https://swapi.dev/api/people/81/,51-100
80,Sly Moore,178,48,none,pale,white,unknown,Female,https://swapi.dev/api/planets/60/,"[https://swapi.dev/api/films/5/, https://swapi...",[],[],[],2014-12-20T20:18:37.619000Z,2014-12-20T21:17:50.496000Z,https://swapi.dev/api/people/82/,0-50


In [13]:
# Clean up species data that is stored as empty arrays

# replace species entry for "R4-P17" with the URL for the species "Droid" in the species_df
people_df.loc[people_df["name"] == "R4-P17", "species"] = ["https://swapi.dev/api/species/2/"]

# in people_df, replace species of [] with the URL for the species "Human" in the species_df
# people_df.loc[people_df["species"].apply(len) == 0, "species"] = ["https://swapi.dev/api/species/1/"]
people_df.loc[people_df["species"].apply(len) == 0, "species"] = people_df.loc[
    people_df["species"].apply(len) == 0, "species"
].apply(lambda x: ["https://swapi.dev/api/species/1/"])

# Species should not be an array, so we will extract the URL from the array
people_df["species"] = people_df["species"].explode()

people_df.head(20)

Unnamed: 0,name,height,mass,hair_color,skin_color,eye_color,birth_year,gender,homeworld,films,species,vehicles,starships,created,edited,url,mass range
0,Luke Skywalker,172,77,blond,fair,blue,19BBY,Male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",https://swapi.dev/api/species/1/,"[https://swapi.dev/api/vehicles/14/, https://s...","[https://swapi.dev/api/starships/12/, https://...",2014-12-09T13:50:51.644000Z,2014-12-20T21:17:56.891000Z,https://swapi.dev/api/people/1/,51-100
1,C-3PO,167,75,,gold,yellow,112BBY,Non-Binary,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",https://swapi.dev/api/species/2/,[],[],2014-12-10T15:10:51.357000Z,2014-12-20T21:17:50.309000Z,https://swapi.dev/api/people/2/,51-100
2,R2-D2,96,32,,"white, blue",red,33BBY,Non-Binary,https://swapi.dev/api/planets/8/,"[https://swapi.dev/api/films/1/, https://swapi...",https://swapi.dev/api/species/2/,[],[],2014-12-10T15:11:50.376000Z,2014-12-20T21:17:50.311000Z,https://swapi.dev/api/people/3/,0-50
3,Darth Vader,202,136,none,white,yellow,41.9BBY,Male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",https://swapi.dev/api/species/1/,[],[https://swapi.dev/api/starships/13/],2014-12-10T15:18:20.704000Z,2014-12-20T21:17:50.313000Z,https://swapi.dev/api/people/4/,Over 100
4,Leia Organa,150,49,brown,light,brown,19BBY,Female,https://swapi.dev/api/planets/2/,"[https://swapi.dev/api/films/1/, https://swapi...",https://swapi.dev/api/species/1/,[https://swapi.dev/api/vehicles/30/],[],2014-12-10T15:20:09.791000Z,2014-12-20T21:17:50.315000Z,https://swapi.dev/api/people/5/,0-50
5,Owen Lars,178,120,"brown, grey",light,blue,52BBY,Male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",https://swapi.dev/api/species/1/,[],[],2014-12-10T15:52:14.024000Z,2014-12-20T21:17:50.317000Z,https://swapi.dev/api/people/6/,Over 100
6,Beru Whitesun lars,165,75,brown,light,blue,47BBY,Female,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",https://swapi.dev/api/species/1/,[],[],2014-12-10T15:53:41.121000Z,2014-12-20T21:17:50.319000Z,https://swapi.dev/api/people/7/,51-100
7,R5-D4,97,32,,"white, red",red,unknown,Non-Binary,https://swapi.dev/api/planets/1/,[https://swapi.dev/api/films/1/],https://swapi.dev/api/species/2/,[],[],2014-12-10T15:57:50.959000Z,2014-12-20T21:17:50.321000Z,https://swapi.dev/api/people/8/,0-50
8,Biggs Darklighter,183,84,black,light,brown,24BBY,Male,https://swapi.dev/api/planets/1/,[https://swapi.dev/api/films/1/],https://swapi.dev/api/species/1/,[],[https://swapi.dev/api/starships/12/],2014-12-10T15:59:50.509000Z,2014-12-20T21:17:50.323000Z,https://swapi.dev/api/people/9/,51-100
9,Obi-Wan Kenobi,182,77,"auburn, white",fair,blue-gray,57BBY,Male,https://swapi.dev/api/planets/20/,"[https://swapi.dev/api/films/1/, https://swapi...",https://swapi.dev/api/species/1/,[https://swapi.dev/api/vehicles/38/],"[https://swapi.dev/api/starships/48/, https://...",2014-12-10T16:16:29.192000Z,2014-12-20T21:17:50.325000Z,https://swapi.dev/api/people/10/,51-100


In [14]:
# Get all information from the other dataframes based on the person's homeworld, species, starships, and vehicles. 
# These columns are arrays of URLs that need to be exploded to get the data in a usable format.

# Get homeworld for a specific person from the planets_df
def get_homeworld(homeworld_url):
    homeworld = planets_df[planets_df["url"] == homeworld_url]["name"].values[0]
    return homeworld

# Get species for a specific person from the species_df
def get_species(species_url):
    species = species_df[species_df["url"] == species_url]["name"].values[0]
    return species

# Get list of starships for a specific person from the starships_df
def get_starships(starships_urls):
    starships = []
    for starship_url in starships_urls:
        starship = starships_df[starships_df["url"] == starship_url]["name"].values[0]
        starships.append(starship)
    return starships

# Get list of vehicles for a specific person from the vehicles_df
def get_vehicles(vehicles_urls):
    vehicles = []
    for vehicle_url in vehicles_urls:
        vehicle = vehicles_df[vehicles_df["url"] == vehicle_url]["name"].values[0]
        vehicles.append(vehicle)

    return vehicles

# Return information about a person based on what information is being requested
def get_person_info(person_name, information):
    person_df = people_df[people_df["name"] == person_name]
    if information == "homeworld":
        return get_homeworld(person_df["homeworld"].values[0])
    elif information == "species":
        return get_species(person_df["species"].values[0])
    elif information == "starships":
        return get_starships(person_df["starships"].values[0])
    elif information == "vehicles":
        return get_vehicles(person_df["vehicles"].values[0])
    else:
        return "Invalid information requested."
    
# TEST
# Apply the functions to the people_df to get the homeworld, species, starships, and vehicles for a specific person
# This person's name is stored in the person variable
person = "Chewbacca"
homeworld = get_person_info(person, "homeworld")
starships = get_person_info(person, "starships")
vehicles = get_person_info(person, "vehicles")
species = get_person_info(person, "species")

print(f"{person} is from {homeworld}.")
print(f"{person} has piloted the following starships: {starships}")
print(f"{person} has driven the following vehicles: {vehicles}")
print(f"{person} is a {species}.")



Chewbacca is from Kashyyyk.
Chewbacca has piloted the following starships: ['Millennium Falcon', 'Imperial shuttle']
Chewbacca has driven the following vehicles: ['AT-ST']
Chewbacca is a Wookie.


In [15]:
# Clean up the classification column to convert reptilian to reptile and mammals to mammal
species_df["classification"] = species_df["classification"].replace({"reptilian": "reptile", "sentient": "reptile", "mammals": "mammal"})
species_df["classification"].value_counts()

classification
mammal        17
amphibian      6
unknown        6
reptile        5
artificial     1
gastropod      1
insectoid      1
Name: count, dtype: int64

In [16]:
species_df.head(10)

Unnamed: 0,name,classification,designation,average_height,skin_colors,hair_colors,eye_colors,average_lifespan,homeworld,language,people,films,created,edited,url
0,Human,mammal,sentient,180.0,"caucasian, black, asian, hispanic","blonde, brown, black, red","brown, blue, green, hazel, grey, amber",120,https://swapi.dev/api/planets/9/,Galactic Basic,"[https://swapi.dev/api/people/66/, https://swa...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T13:52:11.567000Z,2014-12-20T21:36:42.136000Z,https://swapi.dev/api/species/1/
1,Droid,artificial,sentient,,,,,indefinite,,,"[https://swapi.dev/api/people/2/, https://swap...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T15:16:16.259000Z,2014-12-20T21:36:42.139000Z,https://swapi.dev/api/species/2/
2,Wookie,mammal,sentient,210.0,gray,"black, brown","blue, green, yellow, brown, golden, red",400,https://swapi.dev/api/planets/14/,Shyriiwook,"[https://swapi.dev/api/people/13/, https://swa...","[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T16:44:31.486000Z,2014-12-20T21:36:42.142000Z,https://swapi.dev/api/species/3/
3,Rodian,reptile,reptilian,170.0,"green, blue",,black,unknown,https://swapi.dev/api/planets/23/,Galatic Basic,[https://swapi.dev/api/people/15/],[https://swapi.dev/api/films/1/],2014-12-10T17:05:26.471000Z,2014-12-20T21:36:42.144000Z,https://swapi.dev/api/species/4/
4,Hutt,gastropod,sentient,300.0,"green, brown, tan",,"yellow, red",1000,https://swapi.dev/api/planets/24/,Huttese,[https://swapi.dev/api/people/16/],"[https://swapi.dev/api/films/1/, https://swapi...",2014-12-10T17:12:50.410000Z,2014-12-20T21:36:42.146000Z,https://swapi.dev/api/species/5/
5,Yoda's species,mammal,sentient,66.0,"green, yellow","brown, white","brown, green, yellow",900,https://swapi.dev/api/planets/28/,Galactic basic,[https://swapi.dev/api/people/20/],"[https://swapi.dev/api/films/2/, https://swapi...",2014-12-15T12:27:22.877000Z,2014-12-20T21:36:42.148000Z,https://swapi.dev/api/species/6/
6,Trandoshan,reptile,sentient,200.0,"brown, green",none,"yellow, orange",unknown,https://swapi.dev/api/planets/29/,Dosh,[https://swapi.dev/api/people/24/],[https://swapi.dev/api/films/2/],2014-12-15T13:07:47.704000Z,2014-12-20T21:36:42.151000Z,https://swapi.dev/api/species/7/
7,Mon Calamari,amphibian,sentient,160.0,"red, blue, brown, magenta",none,yellow,unknown,https://swapi.dev/api/planets/31/,Mon Calamarian,[https://swapi.dev/api/people/27/],[https://swapi.dev/api/films/3/],2014-12-18T11:09:52.263000Z,2014-12-20T21:36:42.153000Z,https://swapi.dev/api/species/8/
8,Ewok,mammal,sentient,100.0,brown,"white, brown, black","orange, brown",unknown,https://swapi.dev/api/planets/7/,Ewokese,[https://swapi.dev/api/people/30/],[https://swapi.dev/api/films/3/],2014-12-18T11:22:00.285000Z,2014-12-20T21:36:42.155000Z,https://swapi.dev/api/species/9/
9,Sullustan,mammal,sentient,180.0,pale,none,black,unknown,https://swapi.dev/api/planets/33/,Sullutese,[https://swapi.dev/api/people/31/],[https://swapi.dev/api/films/3/],2014-12-18T11:26:20.103000Z,2014-12-20T21:36:42.157000Z,https://swapi.dev/api/species/10/


## Section 2: Gather User Input to Generate Character List

In [17]:
# Function to get 3 random names per category
import random

def get_random_characters(df, column_name, category):
    filtered_df = df[df[column_name] == category]  # Get matching rows
    
    if filtered_df.empty:  
        return ["No characters available"]  # Handle missing categories

    unique_characters = filtered_df["name"].drop_duplicates().tolist()
    
    # Debugging Step
    #print(f"Available characters for {category}: {unique_characters}")  

    # Shuffle list in place before sampling
    random.shuffle(unique_characters) 
    
    return random.sample(unique_characters, min(3, len(unique_characters)))  # Take up to 3 without replacement


# # Create dynamic choice_menu
def generate_choice_menu():
    return {
        "Gender": {
            gender: get_random_characters(people_df, "gender", gender) 
            for gender in ["Male", "Female", "Non-Binary"]
        },
        "Species": {
            species: get_random_characters(species_df, "classification", species)
            for species in ["mammal", "amphibian", "unknown", "reptile", "artificial", "gastropod", "insectoid"]   
        },
        "Mass Range": {
            mass: get_random_characters(people_df, "mass range", mass)
            for mass in ["0-50", "51-100", "Over 100"]
        }
    }

# Function to display and select a character
def display_menu():
    """Displays game character choices and allows the user to select a character."""
    while True:  # Allows replaying the game
        choice_menu = generate_choice_menu()  # Re-generate menu for fresh randomness

        menu_dashes = "-" * 46
        welcome_message1 = "Welcome to THE DEATH STAR game."
        welcome_message2 = "A long time ago in a galaxy far, far away...."
        num_mess_spaces1 = 46 - len(welcome_message1)
        num_mess_spaces2 = 46 - len(welcome_message2)
        welcome_spacing = (num_mess_spaces1 // 2) * " "
        welcome_design = "***"
        num_des_spaces = 46 - len(welcome_design)
        welcome_des_spc = (num_des_spaces // 2) * " "
        #Show the welcome Message
        print(f"""
{menu_dashes}
{welcome_des_spc}{welcome_design}
{welcome_spacing}{welcome_message1}
{welcome_message2}
{welcome_des_spc}{welcome_design}
{menu_dashes}
""")
        for index, key in enumerate(choice_menu.keys(), 1):
            print(f"{index}. {key}")

        choice = int(input("\nWhat Star Wars character would you like to play today? \nSelect # from the trait categories: (1-3): ")) - 1
        selected_key = list(choice_menu.keys())[choice]

        # Display subcategories
        sub_menu = choice_menu[selected_key]
        for index, option in enumerate(sub_menu.keys(), 1):
            print(f"{index}. {option}")

        sub_choice = int(input(f"\nSelect a {selected_key} option: ")) - 1
        selected_sub_key = list(sub_menu.keys())[sub_choice]

        # Display character choices
        characters = sub_menu[selected_sub_key]
        for index, character in enumerate(characters, 1):
            print(f"{index}. {character}")

        char_choice = int(input(f"\nSelect a character from {selected_sub_key}: ")) - 1
        selected_character = characters[char_choice]

        print(f"\nYou selected {selected_character} from {selected_sub_key} under {selected_key}.")

        # Ask user if they want to play again
        play_again = input("\nDo you want to play again? (yes/no): ").strip().lower()
        if play_again != "yes":
            print("\nThanks for playing! May the Force be with you! 🚀")
            break  

In [19]:
# Run the game! 
display_menu()


----------------------------------------------
                     ***
       Welcome to THE DEATH STAR game.
A long time ago in a galaxy far, far away....
                     ***
----------------------------------------------

1. Gender
2. Species
3. Mass Range
1. Male
2. Female
3. Non-Binary
1. Adi Gallia
2. Leia Organa
3. Cordé

You selected Cordé from Female under Gender.

----------------------------------------------
                     ***
       Welcome to THE DEATH STAR game.
A long time ago in a galaxy far, far away....
                     ***
----------------------------------------------

1. Gender
2. Species
3. Mass Range
1. Male
2. Female
3. Non-Binary
1. Wedge Antilles
2. Boba Fett
3. Bossk

You selected Wedge Antilles from Male under Gender.

Thanks for playing! May the Force be with you! 🚀


## Section 3: Filter Character List, Get Random 3 Characters, Ask User to Pick One Character

In [21]:
import numpy as np
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

# Ensure dataframes exist before running
if 'people_df' not in globals() or 'species_df' not in globals():
    raise ValueError("Ensure `people_df` and `species_df` are loaded before running this script.")

# Create a mapping from species URLs to names
def create_species_mapping():
    if 'url' in species_df.columns and 'name' in species_df.columns:
        return dict(zip(species_df['url'], species_df['name']))
    else:
        raise KeyError("Missing 'url' or 'name' column in species_df.")

species_mapping = create_species_mapping()

# Clean and map species column
def clean_species_column(species_value):
    if isinstance(species_value, list):
        return species_value[0]
    return species_value

people_df['species'] = people_df['species'].apply(clean_species_column)

if 'url' in species_df.columns:
    people_df = people_df.merge(species_df[['url', 'name']], left_on='species', right_on='url', how='left')
    if 'url' in people_df.columns:
        people_df = people_df.drop(columns=['url'])
    people_df = people_df.rename(columns={'name': 'species_name'})
else:
    raise KeyError("Missing 'url' column in species_df.")

# Ensure species_name column is properly populated
if 'species_name' not in people_df.columns or people_df['species_name'].isnull().all():
    raise ValueError("The 'species_name' column is empty or missing after merging. Check species_df structure.")

# Dropdown option functions
def get_gender_options():
    return ["Select An Option"] + sorted(set(people_df['gender'].dropna().astype(str)))

def get_species_options():
    species_options = people_df['species_name'].dropna().unique().tolist()
    return ["Select An Option"] + sorted(species_options) if species_options else ["Select An Option"]

def get_mass_ranges():
    return ["Select An Option", "0-50", "51-100", "Over 100"]

# Create dropdowns
gender_dropdown = widgets.Dropdown(
    options=get_gender_options(), value="Select An Option", description='Gender:', disabled=False
)
species_dropdown = widgets.Dropdown(
    options=get_species_options(), value="Select An Option", description='Species:', disabled=False
)
mass_dropdown = widgets.Dropdown(
    options=get_mass_ranges(), value="Select An Option", description='Mass:', disabled=False
)

# Function to find matching characters
def select_random_characters(_):
    selected_gender = gender_dropdown.value
    selected_species = species_dropdown.value
    selected_mass_range = mass_dropdown.value

    if "Select An Option" in [selected_gender, selected_species, selected_mass_range]:
        output.value = "⚠️ Please select an option from each menu."
        return

    try:
        mass_low, mass_high = map(int, selected_mass_range.split(" - "))
    except ValueError:
        output.value = "⚠️ Invalid mass range selection."
        return

    filtered_people = people_df[(people_df['gender'] == selected_gender) &
                                (people_df['species_name'] == selected_species) &
                                (people_df['mass'].between(mass_low, mass_high))]

    if not filtered_people.empty:
        selected_characters = filtered_people.sample(n=min(3, len(filtered_people)), replace=False)
        character_dropdown.options = ["Select An Option"] + sorted(selected_characters['name'].tolist())
        character_dropdown.value = "Select An Option"
        output.value = "🗡️ Choose a character from the dropdown below!"
    else:
        output.value = "❌ No matching characters found."

# Buttons and output fields
select_button = widgets.Button(description="Find 3 Random Characters", button_style='primary')
select_button.on_click(select_random_characters)

reset_button = widgets.Button(description="Reset", button_style='warning')

def reset_selections(_):
    gender_dropdown.value = "Select An Option"
    species_dropdown.value = "Select An Option"
    mass_dropdown.value = "Select An Option"
    character_dropdown.options = ["Select An Option"]
    output.value = "Selections have been reset."

reset_button.on_click(reset_selections)

output = widgets.Textarea(value="Select options and press the button!", layout={'width': '100%', 'height': '100px'})

character_dropdown = widgets.Dropdown(
    options=["Select An Option"], value="Select An Option", description="Choose:", disabled=False
)

def final_character_selection(change):
    selected_character = change['new']
    if selected_character != "Select An Option":
        final_output.value = f"🎉 You have chosen: {selected_character}!"

character_dropdown.observe(final_character_selection, names='value')

final_output = widgets.Textarea(value="Your chosen character will appear here!", layout={'width': '100%', 'height': '50px'})

# Display widgets
display(gender_dropdown, species_dropdown, mass_dropdown, select_button, reset_button, output, character_dropdown, final_output)


Dropdown(description='Gender:', options=('Select An Option', 'Female', 'Male', 'Non-Binary'), value='Select An…

Dropdown(description='Species:', options=('Select An Option', 'Aleena', 'Besalisk', 'Cerean', 'Chagrian', 'Cla…

Dropdown(description='Mass:', options=('Select An Option', '0-50', '51-100', 'Over 100'), value='Select An Opt…

Button(button_style='primary', description='Find 3 Random Characters', style=ButtonStyle())



Textarea(value='Select options and press the button!', layout=Layout(height='100px', width='100%'))

Dropdown(description='Choose:', options=('Select An Option',), value='Select An Option')

Textarea(value='Your chosen character will appear here!', layout=Layout(height='50px', width='100%'))

## Section 4: Ask User to Select Visualization

## Section 5: Display Selected Visualizations

In [None]:
# Function to plot the graph and highlight a specific character


def plot_character_height(people_df, highlight_character=None):
    # Convert 'height' column to numeric, ignoring errors for non-numeric values
    people_df['height'] = pd.to_numeric(people_df['height'], errors='coerce')

    # Drop rows with missing or invalid height values
    people_df_cleaned = people_df.dropna(subset=['height'])

    # Sort by height
    people_df_sorted = people_df_cleaned.sort_values(by='height')

    # Assign colors: highlight one character differently
    colors = ['skyblue' if name != highlight_character else 'orange' for name in people_df_sorted['name']]

    # Create a bar chart
    plt.figure(figsize=(12, 6))
    plt.bar(people_df_sorted['name'], people_df_sorted['height'], color=colors)
    plt.xticks(rotation=90)  # Rotate x-axis labels for better readability
    plt.title('Star Wars Characters: Height from Shortest to Tallest', fontsize=16)
    plt.xlabel('Character', fontsize=14)
    plt.ylabel('Height (cm)', fontsize=14)

    # Add annotation if a character is highlighted
    if highlight_character and highlight_character in people_df_sorted['name'].values:
        char_height = people_df_sorted.loc[people_df_sorted['name'] == highlight_character, 'height'].values[0]
        plt.text(
            people_df_sorted['name'].tolist().index(highlight_character),
            char_height + 5,  # Position slightly above the bar
            f"{highlight_character}: {char_height} cm",
            ha='center', color='orange', fontsize=10
        )

    plt.tight_layout()
    plt.show()

# Example Usage: Highlight Luke Skywalker
plot_character_height(people_df, highlight_character='Luke Skywalker')

In [None]:
def plot_mass_by_character(people_df, highlight_character=None):
    # Convert 'mass' and 'height' columns to numeric, ignoring errors for non-numeric values
    people_df['mass'] = pd.to_numeric(people_df['mass'], errors='coerce')
    people_df['height'] = pd.to_numeric(people_df['height'], errors='coerce')

    # Drop rows with missing or invalid values in 'mass' or 'height'
    people_df_cleaned = people_df.dropna(subset=['mass', 'height'])

    # Sort characters by height for better visualization
    people_df_sorted = people_df_cleaned.sort_values(by='height', ascending=True)

    # Assign colors: highlight one character differently
    colors = ['orange' if name == highlight_character else 'skyblue' for name in people_df_sorted['name']]

    # Create a bar chart
    plt.figure(figsize=(14, 6))
    plt.bar(people_df_sorted['name'], people_df_sorted['mass'], color=colors)

    # Add labels and title
    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.yticks(fontsize=12)
    plt.xlabel('Character', fontsize=14)
    plt.ylabel('Mass (kg)', fontsize=14)
    plt.title('Mass of Star Wars Characters by Height', fontsize=16)
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Annotate highlighted character
    if highlight_character in people_df_sorted['name'].values:
        highlight_row = people_df_sorted[people_df_sorted['name'] == highlight_character]
        plt.text(
            people_df_sorted['name'].tolist().index(highlight_character),
            highlight_row['mass'].values[0] + 5,
            f"{highlight_character}: {highlight_row['mass'].values[0]} kg",
            ha='center', color='orange', fontsize=10
        )

    plt.tight_layout()
    plt.show()

# Example Usage: Highlight "Luke Skywalker"
plot_mass_by_character(people_df, highlight_character='Luke Skywalker')

In [None]:
def plot_surface_water_by_planet(planets_df, highlight_planet=None):
    # Convert 'diameter' and 'surface_water' columns to numeric, ignoring errors for non-numeric values
    planets_df['diameter'] = pd.to_numeric(planets_df['diameter'], errors='coerce')
    planets_df['surface_water'] = pd.to_numeric(planets_df['surface_water'], errors='coerce')

    # Drop rows with missing or invalid values in 'diameter' or 'surface_water'
    planets_df_cleaned = planets_df.dropna(subset=['diameter', 'surface_water'])

    # Sort planets by surface water percentage for better visualization
    planets_df_sorted = planets_df_cleaned.sort_values(by='surface_water', ascending=False)

    # Assign colors: highlight one planet differently
    colors = ['orange' if name == highlight_planet else 'skyblue' for name in planets_df_sorted['name']]

    # Create a bar chart
    plt.figure(figsize=(14, 6))
    bars = plt.bar(planets_df_sorted['name'], planets_df_sorted['surface_water'], color=colors)

    # Add labels and title
    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.yticks(fontsize=12)
    plt.xlabel('Planet', fontsize=14)
    plt.ylabel('Surface Water (%)', fontsize=14)
    plt.title('Surface Water Percentage on Star Wars Planets', fontsize=16)
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Annotate each bar with surface water percentage
    for bar, (name, surface_water) in zip(bars, zip(planets_df_sorted['name'], planets_df_sorted['surface_water'])):
        plt.text(
            bar.get_x() + bar.get_width() / 2, 
            bar.get_height() + 1, 
            f"{surface_water}%", 
            ha='center', fontsize=10, color='black'
        )

    # Highlighted planet annotation (only name, no percentage)
    if highlight_planet in planets_df_sorted['name'].values:
        highlight_index = planets_df_sorted['name'].tolist().index(highlight_planet)
        plt.text(
            highlight_index,
            planets_df_sorted['surface_water'].values[highlight_index] + 3,
            f"{highlight_planet}",
            ha='center', color='red', fontsize=11, fontweight='bold'
        )

    plt.tight_layout()
    plt.show()

# Example Usage: Highlight "Tatooine"
plot_surface_water_by_planet(planets_df, highlight_planet='Tatooine')

In [None]:
def plot_population_density_by_planet(planets_df, highlight_planet=None):
    # Convert 'diameter' and 'population' columns to numeric, ignoring errors for non-numeric values
    planets_df['diameter'] = pd.to_numeric(planets_df['diameter'], errors='coerce')
    planets_df['population'] = pd.to_numeric(planets_df['population'], errors='coerce')

    # Calculate population density (population per km² of diameter) and add it as a new column
    planets_df['population_density'] = planets_df['population'] / ((planets_df['diameter'] / 2) ** 2 * 3.14159)  # Area of a circle
    planets_df['population_density'] = planets_df['population_density'].replace([np.inf, -np.inf], np.nan)  # Handle infinite values

    # Drop rows with missing or invalid values in 'diameter' or 'population_density'
    planets_df_cleaned = planets_df.dropna(subset=['diameter', 'population_density'])

    # Sort planets by population density for better visualization
    planets_df_sorted = planets_df_cleaned.sort_values(by='population_density', ascending=False)

    # Assign colors: highlight one planet differently
    colors = ['orange' if name == highlight_planet else 'skyblue' for name in planets_df_sorted['name']]

    # Create a bar chart
    plt.figure(figsize=(14, 6))
    bars = plt.bar(planets_df_sorted['name'], planets_df_sorted['population_density'], color=colors)

    # Add labels and title
    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.yticks(fontsize=12)
    plt.xlabel('Planet', fontsize=14)
    plt.ylabel('Population Density (per km²)', fontsize=14)
    plt.title('Population Density of Star Wars Planets', fontsize=16)
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Annotate each bar with population density
    for bar, (name, pop_density) in zip(bars, zip(planets_df_sorted['name'], planets_df_sorted['population_density'])):
        plt.text(
            bar.get_x() + bar.get_width() / 2, 
            bar.get_height() + 1, 
            f"{pop_density:.2f}", 
            ha='center', fontsize=10, color='black'
        )

    # Highlighted planet annotation (only name)
    if highlight_planet in planets_df_sorted['name'].values:
        highlight_index = planets_df_sorted['name'].tolist().index(highlight_planet)
        plt.text(
            highlight_index,
            planets_df_sorted['population_density'].values[highlight_index] + 3,
            f"{highlight_planet}",
            ha='center', color='red', fontsize=11, fontweight='bold'
        )

    plt.tight_layout()
    plt.show()

# Example Usage: Highlight "Coruscant"
plot_population_density_by_planet(planets_df, highlight_planet='Coruscant')

In [None]:

def plot_population_density_by_planet(planets_df, highlight_planet=None):
    # Convert 'diameter' and 'population' columns to numeric, ignoring errors for non-numeric values
    planets_df['diameter'] = pd.to_numeric(planets_df['diameter'], errors='coerce')
    planets_df['population'] = pd.to_numeric(planets_df['population'], errors='coerce')

    # Calculate population density (population per km² of diameter)
    planets_df['population_density'] = planets_df['population'] / ((planets_df['diameter'] / 2) ** 2 * 3.14159)
    planets_df['population_density'] = planets_df['population_density'].replace([np.inf, -np.inf], np.nan)  # Handle infinite values

    # Drop rows with missing or invalid values
    planets_df_cleaned = planets_df.dropna(subset=['diameter', 'population_density'])

    # Apply log10 transformation to population density to avoid extreme skewing
    planets_df_cleaned['log_population_density'] = np.log10(planets_df_cleaned['population_density'] + 1)  # Avoid log(0)

    # Sort planets by transformed density for better visualization
    planets_df_sorted = planets_df_cleaned.sort_values(by='log_population_density', ascending=False)

    # Assign colors: highlight one planet differently
    colors = ['orange' if name == highlight_planet else 'skyblue' for name in planets_df_sorted['name']]

    # Create a bar chart
    plt.figure(figsize=(14, 6))
    bars = plt.bar(planets_df_sorted['name'], planets_df_sorted['log_population_density'], color=colors)

    # Add labels and title
    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.yticks(fontsize=12)
    plt.xlabel('Planet', fontsize=14)
    plt.ylabel('Log10(Population Density)', fontsize=14)
    plt.title('Log-Scaled Population Density of Star Wars Planets', fontsize=16)
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Annotate each bar with the actual population density (not log-scaled)
    for bar, (name, pop_density) in zip(bars, zip(planets_df_sorted['name'], planets_df_sorted['population_density'])):
        plt.text(
            bar.get_x() + bar.get_width() / 2, 
            bar.get_height() + 0.1,  # Offset to prevent overlap
            f"{pop_density:.2f}", 
            ha='center', fontsize=10, color='black'
        )

    # Highlighted planet annotation
    if highlight_planet in planets_df_sorted['name'].values:
        highlight_index = planets_df_sorted['name'].tolist().index(highlight_planet)
        plt.text(
            highlight_index,
            planets_df_sorted['log_population_density'].values[highlight_index] + 0.2,
            f"{highlight_planet}",
            ha='center', color='red', fontsize=11, fontweight='bold'
        )

    plt.tight_layout()
    plt.show()

# Example Usage: Highlight "Coruscant"
plot_population_density_by_planet(planets_df, highlight_planet='Coruscant')


In [None]:
def plot_lifespan_by_species(species_df, highlight_species=None):
    # Convert 'average_lifespan' to numeric, handling missing values
    species_df['average_lifespan'] = pd.to_numeric(species_df['average_lifespan'], errors='coerce')
    species_df['average_lifespan'].fillna(species_df['average_lifespan'].median(), inplace=True)

    # Remove negative or zero lifespan values
    species_df_cleaned = species_df[species_df['average_lifespan'] > 0]

    # Sort species by lifespan
    species_df_sorted = species_df_cleaned.sort_values(by='average_lifespan', ascending=False)

    # Ensure all 37 species are included
    print(f"Total species displayed: {len(species_df_sorted)}")

    # Set color for highlighted species
    colors = ['orange' if name == highlight_species else 'skyblue' for name in species_df_sorted['name']]

    # Create a bar chart
    plt.figure(figsize=(20, 8))
    plt.bar(species_df_sorted['name'], species_df_sorted['average_lifespan'], color=colors)
    plt.xticks(rotation=90, ha='right', fontsize=12)
    plt.yticks(fontsize=12)
    plt.xlabel('Species', fontsize=14)
    plt.ylabel('Average Lifespan (years)', fontsize=14)
    plt.title('Average Lifespan of Star Wars Species', fontsize=16)
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Annotate highlighted species
    if highlight_species in species_df_sorted['name'].values:
        highlight_row = species_df_sorted[species_df_sorted['name'] == highlight_species]
        plt.text(
            species_df_sorted['name'].tolist().index(highlight_species),
            highlight_row['average_lifespan'].values[0] + 5,
            f"{highlight_species}: {highlight_row['average_lifespan'].values[0]} years",
            ha='center', color='orange', fontsize=10
        )

    plt.tight_layout()
    plt.show()

# Example Usage
plot_lifespan_by_species(species_df, highlight_species='Nautolan')

In [None]:
def plot_lifespan_by_species(species_df, highlight_species=None):
    # Convert 'average_lifespan' and 'average_height' to numeric, handling missing values
    species_df['average_lifespan'] = pd.to_numeric(species_df['average_lifespan'], errors='coerce')
    species_df['average_height'] = pd.to_numeric(species_df['average_height'], errors='coerce')

    # Drop rows with missing or invalid values in 'average_lifespan' or 'average_height'
    species_df_cleaned = species_df.dropna(subset=['average_lifespan', 'average_height'])

    # Sort species by lifespan for better visualization
    species_df_sorted = species_df_cleaned.sort_values(by='average_lifespan', ascending=False)

    # Assign colors: highlight one species differently
    colors = ['orange' if name == highlight_species else 'skyblue' for name in species_df_sorted['name']]

    # Create a bar chart
    plt.figure(figsize=(18, 6))
    bars = plt.bar(species_df_sorted['name'], species_df_sorted['average_lifespan'], color=colors)

    # Add labels and title
    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.yticks(fontsize=12)
    plt.xlabel('Species', fontsize=14)
    plt.ylabel('Average Lifespan (years)', fontsize=14)
    plt.title('Average Lifespan of Star Wars Species (with Heights)', fontsize=16)
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Annotate each bar with species height
    for bar, (name, height) in zip(bars, zip(species_df_sorted['name'], species_df_sorted['average_height'])):
        plt.text(
            bar.get_x() + bar.get_width() / 2, 
            bar.get_height() + 5,  # Position above the bar
            f"{height} cm", 
            ha='center', fontsize=6, color='black'
        )

    # Highlighted species annotation (only name)
    if highlight_species in species_df_sorted['name'].values:
        highlight_index = species_df_sorted['name'].tolist().index(highlight_species)
        plt.text(
            highlight_index,
            species_df_sorted['average_lifespan'].values[highlight_index] + 10,
            f"{highlight_species}",
            ha='center', color='red', fontsize=11, fontweight='bold'
        )

    plt.tight_layout()
    plt.show()

# Example Usage: Highlight "Human"
plot_lifespan_by_species(species_df, highlight_species='Human')

In [None]:
def plot_log_cost_by_starship(starships_df, highlight_starship=None):
    # Convert 'cost_in_credits' and 'max_atmosphering_speed' to numeric, ignoring errors for non-numeric values
    starships_df['cost_in_credits'] = pd.to_numeric(starships_df['cost_in_credits'], errors='coerce')
    starships_df['max_atmosphering_speed'] = pd.to_numeric(starships_df['max_atmosphering_speed'], errors='coerce')

    # Drop rows with missing or invalid values
    starships_df_cleaned = starships_df.dropna(subset=['cost_in_credits', 'max_atmosphering_speed'])

    # Apply log10 transformation to cost to avoid extreme skewing
    starships_df_cleaned['log_cost'] = np.log10(starships_df_cleaned['cost_in_credits'] + 1)  # Avoid log(0)

    # Sort starships by transformed cost for better visualization
    starships_df_sorted = starships_df_cleaned.sort_values(by='log_cost', ascending=False)

    # Assign colors: highlight one starship differently
    colors = ['orange' if name == highlight_starship else 'skyblue' for name in starships_df_sorted['name']]

    # Create a bar chart
    plt.figure(figsize=(20, 6))
    bars = plt.bar(starships_df_sorted['name'], starships_df_sorted['log_cost'], color=colors)

    # Add labels and title
    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.yticks(fontsize=12)
    plt.xlabel('Starship', fontsize=14)
    plt.ylabel('Log10(Cost in Credits)', fontsize=14)
    plt.title('Log-Scaled Starship Cost in Credits (with Max Speed)', fontsize=16)
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Annotate each bar with actual cost (not log-scaled)
    for bar, (name, cost, speed) in zip(bars, zip(starships_df_sorted['name'], starships_df_sorted['cost_in_credits'], starships_df_sorted['max_atmosphering_speed'])):
        plt.text(
            bar.get_x() + bar.get_width() / 2, 
            bar.get_height() + 0.5,  # Offset to prevent overlap
            f"{cost:,} credits\n{speed} speed", 
            ha='center', fontsize=6, color='black'
        )

    # Highlighted starship annotation (only name)
    if highlight_starship in starships_df_sorted['name'].values:
        highlight_index = starships_df_sorted['name'].tolist().index(highlight_starship)
        plt.text(
            highlight_index,
            starships_df_sorted['log_cost'].values[highlight_index] + 0.2,
            f"{highlight_starship}",
            ha='center', color='red', fontsize=11, fontweight='bold'
        )

    plt.tight_layout()
    plt.show()

# Example Usage: Highlight "Millennium Falcon"
plot_log_cost_by_starship(starships_df, highlight_starship='Millennium Falcon')

In [None]:

def plot_log_passenger_capacity_by_starship(starships_df, highlight_starship=None):
    # Convert 'passengers' and 'length' to numeric, ignoring errors for non-numeric values
    starships_df['passengers'] = pd.to_numeric(starships_df['passengers'], errors='coerce')
    starships_df['length'] = pd.to_numeric(starships_df['length'], errors='coerce')

    # Drop rows with missing or invalid values
    starships_df_cleaned = starships_df.dropna(subset=['passengers', 'length'])

    # Apply log10 transformation to passenger capacity to handle extreme values
    starships_df_cleaned['log_passengers'] = np.log10(starships_df_cleaned['passengers'] + 1)  # Avoid log(0)

    # Sort starships by transformed passenger capacity for better visualization
    starships_df_sorted = starships_df_cleaned.sort_values(by='log_passengers', ascending=False)

    # Assign colors: highlight one starship differently
    colors = ['orange' if name == highlight_starship else 'skyblue' for name in starships_df_sorted['name']]

    # Create a bar chart
    plt.figure(figsize=(20, 6))
    bars = plt.bar(starships_df_sorted['name'], starships_df_sorted['log_passengers'], color=colors)

    # Add labels and title
    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.yticks(fontsize=12)
    plt.xlabel('Starship', fontsize=14)
    plt.ylabel('Log10(Passenger Capacity)', fontsize=14)
    plt.title('Log-Scaled Passenger Capacity of Star Wars Starships (with Length)', fontsize=16)
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Annotate each bar with actual passenger capacity & length
    for bar, (name, passengers, length) in zip(bars, zip(starships_df_sorted['name'], starships_df_sorted['passengers'], starships_df_sorted['length'])):
        plt.text(
            bar.get_x() + bar.get_width() / 2, 
            bar.get_height() + 0.1,  # Offset to prevent overlap
            f"{passengers:,} passengers\n{length} m", 
            ha='center', fontsize=5, color='black'
        )

    # Highlighted starship annotation (only name)
    if highlight_starship in starships_df_sorted['name'].values:
        highlight_index = starships_df_sorted['name'].tolist().index(highlight_starship)
        plt.text(
            highlight_index,
            starships_df_sorted['log_passengers'].values[highlight_index] + 0.2,
            f"{highlight_starship}",
            ha='center', color='red', fontsize=11, fontweight='bold'
        )

    plt.tight_layout()
    plt.show()

# Example Usage: Highlight "Star Destroyer"
plot_log_passenger_capacity_by_starship(starships_df, highlight_starship='Star Destroyer')


In [None]:
# Calculate the number of films for each character
people_df['film_count'] = people_df['films'].apply(len)

# Function to create the bar chart with spacing
def plot_film_count(people_df, highlight_character=None):
    # Sort by film count for a cleaner chart
    people_df_sorted = people_df.sort_values(by='film_count', ascending=False)

    # Create space between bars
    x_positions = np.arange(len(people_df_sorted))

    # Assign colors: highlight one character differently
    colors = ['orange' if name == highlight_character else 'skyblue' for name in people_df_sorted['name']]

    # Create the bar chart with adjusted width and spacing
    plt.figure(figsize=(18, 6))
    plt.bar(x_positions, people_df_sorted['film_count'], color=colors, width=0.6)  # Adjust width for spacing
    plt.xticks(x_positions, people_df_sorted['name'], rotation=45, ha='right')  # Add rotation and spacing
    plt.title('Number of Films per Character', fontsize=16)
    plt.xlabel('Character', fontsize=14)
    plt.ylabel('Number of Films', fontsize=14)

    # Annotate highlighted character
    if highlight_character in people_df_sorted['name'].values:
        highlight_row = people_df_sorted[people_df_sorted['name'] == highlight_character]
        highlight_index = people_df_sorted[people_df_sorted['name'] == highlight_character].index[0]
        plt.text(
            x_positions[list(people_df_sorted.index).index(highlight_index)],
            highlight_row['film_count'].values[0] + 0.2,
            f"{highlight_character}: {highlight_row['film_count'].values[0]} films",
            ha='center',
            fontsize=10,
            color='orange'
        )

    # Add spacing between bars
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(axis='y', linestyle='--', alpha=0.7)  # Add light grid for better readability
    plt.tight_layout()

    plt.show()

# Example Usage: Highlight "Luke Skywalker"
plot_film_count(people_df, highlight_character='Luke Skywalker')

In [None]:
# Calculate the number of films for each planet
planets_df['film_count'] = planets_df['films'].apply(len)

def plot_film_count_by_planet(planets_df, highlight_planet=None):
    # Sort by film count for a cleaner chart
    planets_df_sorted = planets_df.sort_values(by='film_count', ascending=False)

    # Create space between bars
    x_positions = np.arange(len(planets_df_sorted))

    # Assign colors: highlight one planet differently
    colors = ['orange' if name == highlight_planet else 'skyblue' for name in planets_df_sorted['name']]

    # Create the bar chart with adjusted width and spacing
    plt.figure(figsize=(18, 6))
    plt.bar(x_positions, planets_df_sorted['film_count'], color=colors, width=0.6)  # Adjust width for spacing
    plt.xticks(x_positions, planets_df_sorted['name'], rotation=45, ha='right')  # Add rotation and spacing
    plt.title('Number of Films per Planet', fontsize=16)
    plt.xlabel('Planet', fontsize=14)
    plt.ylabel('Number of Films', fontsize=14)

    # Annotate highlighted planet
    if highlight_planet in planets_df_sorted['name'].values:
        highlight_row = planets_df_sorted[planets_df_sorted['name'] == highlight_planet]
        highlight_index = planets_df_sorted[planets_df_sorted['name'] == highlight_planet].index[0]
        plt.text(
            x_positions[list(planets_df_sorted.index).index(highlight_index)],
            highlight_row['film_count'].values[0] + 0.2,
            f"{highlight_planet}: {highlight_row['film_count'].values[0]} films",
            ha='center',
            fontsize=10,
            color='orange'
        )

    # Add spacing between bars
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(axis='y', linestyle='--', alpha=0.7)  # Add light grid for better readability
    plt.tight_layout()

    plt.show()

# Example Usage: Highlight "Tatooine"
plot_film_count_by_planet(planets_df, highlight_planet='Tatooine')

In [None]:
# Calculate the number of films for each species
species_df['film_count'] = species_df['films'].apply(len)

def plot_film_count_by_species(species_df, highlight_species=None):
    # Sort by film count for a cleaner chart
    species_df_sorted = species_df.sort_values(by='film_count', ascending=False)

    # Create space between bars
    x_positions = np.arange(len(species_df_sorted))

    # Assign colors: highlight one species differently
    colors = ['orange' if name == highlight_species else 'skyblue' for name in species_df_sorted['name']]

    # Create the bar chart with adjusted width and spacing
    plt.figure(figsize=(18, 6))
    plt.bar(x_positions, species_df_sorted['film_count'], color=colors, width=0.6)  # Adjust width for spacing
    plt.xticks(x_positions, species_df_sorted['name'], rotation=45, ha='right')  # Add rotation and spacing
    plt.title('Number of Films per Species', fontsize=16)
    plt.xlabel('Species', fontsize=14)
    plt.ylabel('Number of Films', fontsize=14)

    # Annotate highlighted species
    if highlight_species in species_df_sorted['name'].values:
        highlight_row = species_df_sorted[species_df_sorted['name'] == highlight_species]
        highlight_index = species_df_sorted[species_df_sorted['name'] == highlight_species].index[0]
        plt.text(
            x_positions[list(species_df_sorted.index).index(highlight_index)],
            highlight_row['film_count'].values[0] + 0.2,
            f"{highlight_species}: {highlight_row['film_count'].values[0]} films",
            ha='center',
            fontsize=10,
            color='orange'
        )

    # Add spacing between bars
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(axis='y', linestyle='--', alpha=0.7)  # Add light grid for better readability
    plt.tight_layout()

    plt.show()

# Example Usage: Highlight "Wookiee"
plot_film_count_by_species(species_df, highlight_species='Wookiee')

In [None]:
# Calculate the number of films for each starship
starships_df['film_count'] = starships_df['films'].apply(len)

def plot_film_count_by_starship(starships_df, highlight_starship=None):
    # Sort by film count for a cleaner chart
    starships_df_sorted = starships_df.sort_values(by='film_count', ascending=False)

    # Create space between bars
    x_positions = np.arange(len(starships_df_sorted))

    # Assign colors: highlight one starship differently
    colors = ['orange' if name == highlight_starship else 'skyblue' for name in starships_df_sorted['name']]

    # Create the bar chart with adjusted width and spacing
    plt.figure(figsize=(18, 6))
    plt.bar(x_positions, starships_df_sorted['film_count'], color=colors, width=0.6)  # Adjust width for spacing
    plt.xticks(x_positions, starships_df_sorted['name'], rotation=45, ha='right')  # Add rotation and spacing
    plt.title('Number of Films per Starship', fontsize=16)
    plt.xlabel('Starship', fontsize=14)
    plt.ylabel('Number of Films', fontsize=14)

    # Annotate highlighted starship
    if highlight_starship in starships_df_sorted['name'].values:
        highlight_row = starships_df_sorted[starships_df_sorted['name'] == highlight_starship]
        highlight_index = starships_df_sorted[starships_df_sorted['name'] == highlight_starship].index[0]
        plt.text(
            x_positions[list(starships_df_sorted.index).index(highlight_index)],
            highlight_row['film_count'].values[0] + 0.2,
            f"{highlight_starship}: {highlight_row['film_count'].values[0]} films",
            ha='center',
            fontsize=10,
            color='orange'
        )

    # Add spacing between bars
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(axis='y', linestyle='--', alpha=0.7)  # Add light grid for better readability
    plt.tight_layout()

    plt.show()

# Example Usage: Highlight "Millennium Falcon"
plot_film_count_by_starship(starships_df, highlight_starship='Millennium Falcon')

### Chart Function Testing

In [130]:
def plot_bar_chart(
    df, x_col, y_col, 
    title, xlabel, ylabel, 
    highlight_item=None, log_scale=False, annotation_col=None
):
    """
    Generalized function to create bar charts for different Star Wars data.

    Parameters:
        df (DataFrame): The input DataFrame.
        x_col (str): Column name to be used for X-axis labels.
        y_col (str): Column name for Y-axis values.
        title (str): Chart title.
        xlabel (str): Label for X-axis.
        ylabel (str): Label for Y-axis.
        highlight_item (str, optional): Name of the item to highlight in orange.
        log_scale (bool, optional): Apply log10 transformation to Y-axis values.
        annotation_col (str, optional): Column for additional bar annotations (e.g., height, speed).
    """
    # Convert y_col to numeric and drop NaN values
    df[y_col] = pd.to_numeric(df[y_col], errors='coerce')
    df = df.dropna(subset=[y_col])

    # Apply log10 transformation if enabled
    if log_scale:
        df[f'log_{y_col}'] = np.log10(df[y_col] + 1)  # Avoid log(0)
        y_col = f'log_{y_col}'

    # Sort by Y values for better visualization
    df_sorted = df.sort_values(by=y_col, ascending=False)

    # Assign colors: highlight selected item differently
    colors = ['orange' if name == highlight_item else 'skyblue' for name in df_sorted[x_col]]

    # Create the bar chart
    plt.figure(figsize=(14, 6))
    bars = plt.bar(df_sorted[x_col], df_sorted[y_col], color=colors)

    # Add labels and title
    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.yticks(fontsize=12)
    plt.xlabel(xlabel, fontsize=14)
    plt.ylabel(ylabel if not log_scale else f'Log10({ylabel})', fontsize=14)
    plt.title(title, fontsize=16)
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Annotate each bar with additional info if provided
    if annotation_col:
        df_sorted[annotation_col] = pd.to_numeric(df_sorted[annotation_col], errors='coerce')
        for bar, (name, annotation) in zip(bars, zip(df_sorted[x_col], df_sorted[annotation_col])):
            plt.text(
                bar.get_x() + bar.get_width() / 2, 
                bar.get_height() + 0.1, 
                f"{annotation}" if not pd.isna(annotation) else "", 
                ha='center', fontsize=9, color='black'
            )

    # Highlighted item annotation (only name)
    if highlight_item in df_sorted[x_col].values:
        highlight_index = df_sorted[x_col].tolist().index(highlight_item)
        plt.text(
            highlight_index,
            df_sorted[y_col].values[highlight_index] + 0.2,
            f"{highlight_item}",
            ha='center', color='red', fontsize=11, fontweight='bold'
        )

    plt.tight_layout()
    plt.show()

In [None]:
plot_bar_chart(
    df=people_df, 
    x_col='name', y_col='height', 
    title='Star Wars Characters: Height from Shortest to Tallest',
    xlabel='Character', ylabel='Height (cm)', 
    highlight_item='Luke Skywalker'
)

In [None]:
plot_bar_chart(
    df=starships_df, 
    x_col='name', y_col='cost_in_credits', 
    title='Log-Scaled Starship Cost in Credits',
    xlabel='Starship', ylabel='Cost in Credits', 
    highlight_item='Millennium Falcon', log_scale=True,
    annotation_col='max_atmosphering_speed'
)

In [None]:
planets_df['film_count'] = planets_df['films'].apply(len)  # Count films per planet

plot_bar_chart(
    df=planets_df, 
    x_col='name', y_col='film_count', 
    title='Number of Films per Planet',
    xlabel='Planet', ylabel='Number of Films', 
    highlight_item='Tatooine'
)