# Star Wars Data Analysis

## Section 1: Setup imports and dataframes

In [74]:
# Uncomment the following lines to install the necessary libraries for the async calls

#!pip install asyncio
#!pip install aiohttp

In [2]:
# Import necessary libraries for dataframes, HTTP requests, JSON, and charts

import pandas as pd
from prophet import Prophet
import datetime as dt
import numpy as np
import requests
import json
import matplotlib.pyplot as plt
import asyncio
import aiohttp

%matplotlib inline

In [3]:
# Set the URLs for the APIs for each category SWAPI provides

films_url = "https://swapi.dev/api/films/"
people_url = "https://swapi.dev/api/people/"
planets_url = "https://swapi.dev/api/planets/"
species_url = "https://swapi.dev/api/species/"
starships_url = "https://swapi.dev/api/starships/"
vehicles_url = "https://swapi.dev/api/vehicles/"

In [4]:
# Retrieve the limit each category has for API requests

def retrieve_pages(url):
    response = requests.get(url)
    data = response.json()
    
    total_records = 10
    try:
        total_records = data["count"]
    except:
        pass
    
    total_pages = int(total_records/10) + (1 if total_records%10 > 0 else 0)

    return total_pages

In [5]:
# Get the limits for each category

films_pages = retrieve_pages(films_url)
people_pages = retrieve_pages(people_url)  
planets_pages = retrieve_pages(planets_url)
species_pages = retrieve_pages(species_url)
starships_pages = retrieve_pages(starships_url)
vehicles_pages = retrieve_pages(vehicles_url)

print(f"The number of pages of films is {films_pages}")
print(f"The number of pages of people is {people_pages}")
print(f"The number of pages of planets is {planets_pages}")
print(f"The number of pages of species is {species_pages}")
print(f"The number of pages of starships is {starships_pages}")
print(f"The number of pages of vehicles is {vehicles_pages}")

The number of pages of films is 1
The number of pages of people is 9
The number of pages of planets is 6
The number of pages of species is 4
The number of pages of starships is 4
The number of pages of vehicles is 4


In [6]:
# Function to fetch data from a URL asynchronously, reducing response time by about 40% from synchronously  

async def fetch(url):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            return await response.json()
        
# Define function to call url based on number of pages and append JSON from results property
async def assemble_json(url, pages):
    total_json = []
    print(f"Retrieving {pages} pages from {url}")
    for page in range(1, pages+1):
        composed_url = f"{url}?page={page}"
        # print(f"\tRetrieving {composed_url}")
        req_json = await fetch(composed_url)
        total_json.extend(req_json["results"]) 
    print(f"Found {len(total_json)} records at {url}")
    return total_json        

In [7]:
# Call assemble_json with each SWAPI URL and number of pages

# Run the async tasks
results = await asyncio.gather(
    assemble_json(films_url, films_pages),
    assemble_json(people_url, people_pages),
    assemble_json(planets_url, planets_pages),
    assemble_json(species_url, species_pages),
    assemble_json(starships_url, starships_pages),
    assemble_json(vehicles_url, vehicles_pages),
)

film_data, people_data, planets_data, species_data, starships_data, vehicles_data = results


Retrieving 1 pages from https://swapi.dev/api/films/
Retrieving 9 pages from https://swapi.dev/api/people/
Retrieving 6 pages from https://swapi.dev/api/planets/
Retrieving 4 pages from https://swapi.dev/api/species/
Retrieving 4 pages from https://swapi.dev/api/starships/
Retrieving 4 pages from https://swapi.dev/api/vehicles/
Found 6 records at https://swapi.dev/api/films/
Found 39 records at https://swapi.dev/api/vehicles/
Found 36 records at https://swapi.dev/api/starships/
Found 37 records at https://swapi.dev/api/species/
Found 60 records at https://swapi.dev/api/planets/
Found 82 records at https://swapi.dev/api/people/


In [8]:
# Create dataframes for each category
# Need to explode the arrays in films_df to get the data in a usable format
films_df = pd.DataFrame(film_data)
people_df = pd.DataFrame(people_data)
planets_df = pd.DataFrame(planets_data)
species_df = pd.DataFrame(species_data)
starships_df = pd.DataFrame(starships_data)
vehicles_df = pd.DataFrame(vehicles_data)

# Display the first 5 rows of each dataframe
#display(films_df.head())
display(people_df.head(40))
#display(planets_df.head())
#display(species_df.head())
#display(starships_df.head())
#display(vehicles_df.head())

Unnamed: 0,name,height,mass,hair_color,skin_color,eye_color,birth_year,gender,homeworld,films,species,vehicles,starships,created,edited,url
0,Luke Skywalker,172,77,blond,fair,blue,19BBY,male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],"[https://swapi.dev/api/vehicles/14/, https://s...","[https://swapi.dev/api/starships/12/, https://...",2014-12-09T13:50:51.644000Z,2014-12-20T21:17:56.891000Z,https://swapi.dev/api/people/1/
1,C-3PO,167,75,,gold,yellow,112BBY,,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[https://swapi.dev/api/species/2/],[],[],2014-12-10T15:10:51.357000Z,2014-12-20T21:17:50.309000Z,https://swapi.dev/api/people/2/
2,R2-D2,96,32,,"white, blue",red,33BBY,,https://swapi.dev/api/planets/8/,"[https://swapi.dev/api/films/1/, https://swapi...",[https://swapi.dev/api/species/2/],[],[],2014-12-10T15:11:50.376000Z,2014-12-20T21:17:50.311000Z,https://swapi.dev/api/people/3/
3,Darth Vader,202,136,none,white,yellow,41.9BBY,male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[],[https://swapi.dev/api/starships/13/],2014-12-10T15:18:20.704000Z,2014-12-20T21:17:50.313000Z,https://swapi.dev/api/people/4/
4,Leia Organa,150,49,brown,light,brown,19BBY,female,https://swapi.dev/api/planets/2/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[https://swapi.dev/api/vehicles/30/],[],2014-12-10T15:20:09.791000Z,2014-12-20T21:17:50.315000Z,https://swapi.dev/api/people/5/
5,Owen Lars,178,120,"brown, grey",light,blue,52BBY,male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[],[],2014-12-10T15:52:14.024000Z,2014-12-20T21:17:50.317000Z,https://swapi.dev/api/people/6/
6,Beru Whitesun lars,165,75,brown,light,blue,47BBY,female,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[],[],2014-12-10T15:53:41.121000Z,2014-12-20T21:17:50.319000Z,https://swapi.dev/api/people/7/
7,R5-D4,97,32,,"white, red",red,unknown,,https://swapi.dev/api/planets/1/,[https://swapi.dev/api/films/1/],[https://swapi.dev/api/species/2/],[],[],2014-12-10T15:57:50.959000Z,2014-12-20T21:17:50.321000Z,https://swapi.dev/api/people/8/
8,Biggs Darklighter,183,84,black,light,brown,24BBY,male,https://swapi.dev/api/planets/1/,[https://swapi.dev/api/films/1/],[],[],[https://swapi.dev/api/starships/12/],2014-12-10T15:59:50.509000Z,2014-12-20T21:17:50.323000Z,https://swapi.dev/api/people/9/
9,Obi-Wan Kenobi,182,77,"auburn, white",fair,blue-gray,57BBY,male,https://swapi.dev/api/planets/20/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[https://swapi.dev/api/vehicles/38/],"[https://swapi.dev/api/starships/48/, https://...",2014-12-10T16:16:29.192000Z,2014-12-20T21:17:50.325000Z,https://swapi.dev/api/people/10/


In [None]:
# Clean up the Gender column to change all entires that aren't "male" or "female" to be "non-binary"
people_df['gender']= people_df['gender'].apply(lambda x: x if x in ['male', 'female'] else 'Non-Binary').replace({"male": "Male", "female": "Female"})
display(people_df.head())

Unnamed: 0,name,height,mass,hair_color,skin_color,eye_color,birth_year,gender,homeworld,films,species,vehicles,starships,created,edited,url
0,Luke Skywalker,172,77,blond,fair,blue,19BBY,Male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],"[https://swapi.dev/api/vehicles/14/, https://s...","[https://swapi.dev/api/starships/12/, https://...",2014-12-09T13:50:51.644000Z,2014-12-20T21:17:56.891000Z,https://swapi.dev/api/people/1/
1,C-3PO,167,75,,gold,yellow,112BBY,Non-Binary,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[https://swapi.dev/api/species/2/],[],[],2014-12-10T15:10:51.357000Z,2014-12-20T21:17:50.309000Z,https://swapi.dev/api/people/2/
2,R2-D2,96,32,,"white, blue",red,33BBY,Non-Binary,https://swapi.dev/api/planets/8/,"[https://swapi.dev/api/films/1/, https://swapi...",[https://swapi.dev/api/species/2/],[],[],2014-12-10T15:11:50.376000Z,2014-12-20T21:17:50.311000Z,https://swapi.dev/api/people/3/
3,Darth Vader,202,136,none,white,yellow,41.9BBY,Male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[],[https://swapi.dev/api/starships/13/],2014-12-10T15:18:20.704000Z,2014-12-20T21:17:50.313000Z,https://swapi.dev/api/people/4/
4,Leia Organa,150,49,brown,light,brown,19BBY,Female,https://swapi.dev/api/planets/2/,"[https://swapi.dev/api/films/1/, https://swapi...",[],[https://swapi.dev/api/vehicles/30/],[],2014-12-10T15:20:09.791000Z,2014-12-20T21:17:50.315000Z,https://swapi.dev/api/people/5/


## Section 2: Gather User Input to Generate Character List

In [62]:
choice_menu = {
    "Gender": {
        "Male": ["Luke Skywalker", "Han Solo", "Obi-Wan Kenobi"],
        "Female": ["Leia Organa", "Ahsoka Tano", "Padmé Amidala"],
        "Non-Binary": ["Rey Skywalker", "Doctor Aphra", "Zayne Carrick"]
    },
    "Species": {
        "Human": ["Anakin Skywalker", "Mace Windu", "Qui-Gon Jinn"],
        "Droid": ["R2-D2", "C-3PO", "IG-88"],
        "Wookie": ["Chewbacca", "Tarfful", "Black Krrsantan"]
    },
    "Mass Range": {
        "1-50": ["Yoda", "R2-D2", "Wicket W. Warrick"],
        "50-100": ["Chewbacca", "Darth Vader", "Boba Fett"],
        "100+": ["Jabba the Hutt", "Zillo Beast", "Rancor"]
    }
}


In [63]:
# Welcome message formattting
menu_dashes = "-" * 46
welcome_message1 = "Welcome to THE DEATH STAR game."
welcome_message2 = "A long time ago in a galaxy far, far away...."
num_mess_spaces1 = 46 - len(welcome_message1)
num_mess_spaces2 = 46 - len(welcome_message2)
welcome_spacing = (num_mess_spaces1//2) * " "
welcome_design = "***"
num_des_spaces = 46 - len(welcome_design)
welcome_des_spc = (num_des_spaces//2) * " "

In [None]:
# Launch the game and present a greeting to the player
def display_menu(menu):
    """Displays game character choices and allows the user to select an character."""
    print(f"""
{menu_dashes}
{welcome_des_spc}{welcome_design}
{welcome_spacing}{welcome_message1}
{welcome_message2}
{welcome_des_spc}{welcome_design}
{menu_dashes}
""")
    for index, key in enumerate(menu.keys(), 1):
        print(f"{index}. {key}")

    choice = int(input("\nWhat Star Wars character would you like to play today? \nSelect # from the trait categories: (1-3): ")) - 1
    selected_key = list(menu.keys())[choice]

    # Display subcategories (e.g., Male/Female/Non-Binary, Human/Droid/Wookie)
    sub_menu = menu[selected_key]
    for index, option in enumerate(sub_menu.keys(), 1):
        print(f"{index}. {option}")

    sub_choice = int(input(f"\nSelect a {selected_key} option: ")) - 1
    selected_sub_key = list(sub_menu.keys())[sub_choice]

    # Display character choices
    characters = sub_menu[selected_sub_key]
    for index, character in enumerate(characters, 1):
        print(f"{index}. {character}")

    char_choice = int(input(f"\nSelect a character from {selected_sub_key}: ")) - 1
    selected_character = characters[char_choice]

    print(f"\nYou selected {selected_character} from {selected_sub_key} under {selected_key}.")

# Run the menu selection
display_menu(choice_menu)


## Section 3: Filter Character List, Get Random 3 Characters, Ask User to Pick One Character

## Section 4: Ask User to Select Visualization

## Section 5: Display Selected Visualizations

In [None]:
# Generic Chart by height
# Convert 'height' column to numeric, ignoring errors for non-numeric values
people_df['height'] = pd.to_numeric(people_df['height'], errors='coerce')

# Drop rows with missing or invalid height values
people_df_cleaned = people_df.dropna(subset=['height'])

# Sort by height
people_df_sorted = people_df_cleaned.sort_values(by='height')

# Create a bar chart
plt.figure(figsize=(12, 6))
plt.bar(people_df_sorted['name'], people_df_sorted['height'], color='skyblue')
plt.xticks(rotation=90)  # Rotate x-axis labels for better readability
plt.title('Star Wars Characters: Height from Shortest to Tallest', fontsize=16)
plt.xlabel('Character', fontsize=14)
plt.ylabel('Height (cm)', fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
# Function to plot the graph and highlight a specific character


def plot_character_height(people_df, highlight_character=None):
    # Convert 'height' column to numeric, ignoring errors for non-numeric values
    people_df['height'] = pd.to_numeric(people_df['height'], errors='coerce')

    # Drop rows with missing or invalid height values
    people_df_cleaned = people_df.dropna(subset=['height'])

    # Sort by height
    people_df_sorted = people_df_cleaned.sort_values(by='height')

    # Assign colors: highlight one character differently
    colors = ['skyblue' if name != highlight_character else 'orange' for name in people_df_sorted['name']]

    # Create a bar chart
    plt.figure(figsize=(12, 6))
    plt.bar(people_df_sorted['name'], people_df_sorted['height'], color=colors)
    plt.xticks(rotation=90)  # Rotate x-axis labels for better readability
    plt.title('Star Wars Characters: Height from Shortest to Tallest', fontsize=16)
    plt.xlabel('Character', fontsize=14)
    plt.ylabel('Height (cm)', fontsize=14)

    # Add annotation if a character is highlighted
    if highlight_character and highlight_character in people_df_sorted['name'].values:
        char_height = people_df_sorted.loc[people_df_sorted['name'] == highlight_character, 'height'].values[0]
        plt.text(
            people_df_sorted['name'].tolist().index(highlight_character),
            char_height + 5,  # Position slightly above the bar
            f"{highlight_character}: {char_height} cm",
            ha='center', color='orange', fontsize=10
        )

    plt.tight_layout()
    plt.show()

# Example Usage: Highlight Luke Skywalker
plot_character_height(people_df, highlight_character='Luke Skywalker')

In [None]:
# Generic Scatter Chart for comparing Mass vs. Height of Star Wars Characters
# Load the data from the CSV file

# Convert 'mass' and 'height' columns to numeric, ignoring errors for non-numeric values
people_df['mass'] = pd.to_numeric(people_df['mass'], errors='coerce')
people_df['height'] = pd.to_numeric(people_df['height'], errors='coerce')

# Drop rows with missing or invalid values in 'mass' or 'height'
people_df_cleaned = people_df.dropna(subset=['mass', 'height'])

# Create the scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(people_df_cleaned['height'], people_df_cleaned['mass'], color='skyblue', edgecolor='black', alpha=0.7)

# Add labels, title, and grid
plt.title('Mass vs. Height of Star Wars Characters', fontsize=16)
plt.xlabel('Height (cm)', fontsize=14)
plt.ylabel('Mass (kg)', fontsize=14)
plt.grid(alpha=0.3)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:


def plot_mass_vs_height(people_df, highlight_character=None):
    # Convert 'mass' and 'height' columns to numeric, ignoring errors for non-numeric values
    people_df['mass'] = pd.to_numeric(people_df['mass'], errors='coerce')
    people_df['height'] = pd.to_numeric(people_df['height'], errors='coerce')

    # Drop rows with missing or invalid values in 'mass' or 'height'
    people_df_cleaned = people_df.dropna(subset=['mass', 'height'])

    # Create the scatter plot
    plt.figure(figsize=(10, 6))
    plt.scatter(
        people_df_cleaned['height'], 
        people_df_cleaned['mass'], 
        color='skyblue', 
        edgecolor='black', 
        alpha=0.7, 
        label='Other Characters'
    )

    # Highlight a specific character if provided
    if highlight_character and highlight_character in people_df_cleaned['name'].values:
        highlight_row = people_df_cleaned[people_df_cleaned['name'] == highlight_character]
        plt.scatter(
            highlight_row['height'], 
            highlight_row['mass'], 
            color='orange', 
            edgecolor='black', 
            s=100,  # Larger marker size for emphasis
            label=f'{highlight_character}'
        )
        # Add annotation for the highlighted character
        plt.annotate(
            f"{highlight_character}\n({highlight_row['height'].values[0]} cm, {highlight_row['mass'].values[0]} kg)",
            (highlight_row['height'].values[0], highlight_row['mass'].values[0]),
            textcoords="offset points",
            xytext=(10, -10),  # Offset for the annotation
            ha='left',
            fontsize=10,
            color='orange'
        )

    # Add labels, title, legend, and grid
    plt.title('Mass vs. Height of Star Wars Characters', fontsize=16)
    plt.xlabel('Height (cm)', fontsize=14)
    plt.ylabel('Mass (kg)', fontsize=14)
    plt.grid(alpha=0.3)
    plt.legend()
    plt.tight_layout()

    # Show the plot
    plt.show()

# Example Usage: Highlight Luke Skywalker
plot_mass_vs_height(people_df, highlight_character='Luke Skywalker')

In [None]:
# Generic Scatter plot chart for comaparing Homeworld data vs surface water percentage
# Convert 'diameter' and 'surface_water' columns to numeric, ignoring errors for non-numeric values
planets_df['diameter'] = pd.to_numeric(planets_df['diameter'], errors='coerce')
planets_df['surface_water'] = pd.to_numeric(planets_df['surface_water'], errors='coerce')

# Drop rows with missing or invalid values in 'diameter' or 'surface_water'
planets_df_cleaned = planets_df.dropna(subset=['diameter', 'surface_water'])

# Create the scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(
    planets_df_cleaned['diameter'], 
    planets_df_cleaned['surface_water'], 
    color='skyblue', 
    edgecolor='black', 
    alpha=0.7
)

# Add labels, title, and grid
plt.title('Homeworld Diameter vs Surface Water Percentage', fontsize=16)
plt.xlabel('Diameter (km)', fontsize=14)
plt.ylabel('Surface Water (%)', fontsize=14)
plt.grid(alpha=0.3)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:


def plot_diameter_vs_surface_water(planets_df, highlight_planet=None):
    # Convert 'diameter' and 'surface_water' columns to numeric, ignoring errors for non-numeric values
    planets_df['diameter'] = pd.to_numeric(planets_df['diameter'], errors='coerce')
    planets_df['surface_water'] = pd.to_numeric(planets_df['surface_water'], errors='coerce')

    # Drop rows with missing or invalid values in 'diameter' or 'surface_water'
    planets_df_cleaned = planets_df.dropna(subset=['diameter', 'surface_water'])

    # Create the scatter plot
    plt.figure(figsize=(10, 6))
    plt.scatter(
        planets_df_cleaned['diameter'], 
        planets_df_cleaned['surface_water'], 
        color='skyblue', 
        edgecolor='black', 
        alpha=0.7, 
        label='Other Planets'
    )

    # Highlight specific planet if provided
    if highlight_planet and highlight_planet in planets_df_cleaned['name'].values:
        highlight_row = planets_df_cleaned[planets_df_cleaned['name'] == highlight_planet]
        plt.scatter(
            highlight_row['diameter'], 
            highlight_row['surface_water'], 
            color='orange', 
            edgecolor='black', 
            s=100,  # Larger marker size for emphasis
            label=f'{highlight_planet}'
        )
        # Add annotation for the highlighted planet
        plt.annotate(
            f"{highlight_planet}\n({highlight_row['diameter'].values[0]} km, {highlight_row['surface_water'].values[0]}%)",
            (highlight_row['diameter'].values[0], highlight_row['surface_water'].values[0]),
            textcoords="offset points",
            xytext=(10, -10),  # Offset for the annotation
            ha='left',
            fontsize=10,
            color='orange'
        )

    # Add labels, title, legend, and grid
    plt.title('Homeworld Diameter vs Surface Water Percentage', fontsize=16)
    plt.xlabel('Diameter (km)', fontsize=14)
    plt.ylabel('Surface Water (%)', fontsize=14)
    plt.grid(alpha=0.3)
    plt.legend()
    plt.tight_layout()

    # Show the plot
    plt.show()

# Example Usage: Highlight Tatooine
plot_diameter_vs_surface_water(planets_df, highlight_planet='Tatooine')

In [None]:
# Code for comparing planet diameter vs population

def plot_diameter_vs_population_density(planets_df, highlight_planet=None):
    # Convert 'diameter' and 'population' columns to numeric, ignoring errors for non-numeric values
    planets_df['diameter'] = pd.to_numeric(planets_df['diameter'], errors='coerce')
    planets_df['population'] = pd.to_numeric(planets_df['population'], errors='coerce')

    # Calculate population density (population per km² of diameter) and add it as a new column
    planets_df['population_density'] = planets_df['population'] / ((planets_df['diameter'] / 2) ** 2 * 3.14159)  # Area of a circle
    planets_df['population_density'] = planets_df['population_density'].replace([np.inf, -np.inf], np.nan)  # Handle infinite values

    # Drop rows with missing or invalid values in 'diameter' or 'population_density'
    planets_df_cleaned = planets_df.dropna(subset=['diameter', 'population_density'])

    # Create the scatter plot
    plt.figure(figsize=(10, 6))
    plt.scatter(
        planets_df_cleaned['diameter'], 
        planets_df_cleaned['population_density'], 
        color='skyblue', 
        edgecolor='black', 
        alpha=0.7, 
        label='Other Planets'
    )

    # Highlight specific planet if provided
    if highlight_planet and highlight_planet in planets_df_cleaned['name'].values:
        highlight_row = planets_df_cleaned[planets_df_cleaned['name'] == highlight_planet]
        plt.scatter(
            highlight_row['diameter'], 
            highlight_row['population_density'], 
            color='orange', 
            edgecolor='black', 
            s=100,  # Larger marker size for emphasis
            label=f'{highlight_planet}'
        )
        # Add annotation for the highlighted planet
        plt.annotate(
            f"{highlight_planet}\n({highlight_row['diameter'].values[0]} km, {highlight_row['population_density'].values[0]:.2f} per km²)",
            (highlight_row['diameter'].values[0], highlight_row['population_density'].values[0]),
            textcoords="offset points",
            xytext=(10, -10),  # Offset for the annotation
            ha='left',
            fontsize=10,
            color='orange'
        )

    # Add labels, title, legend, and grid
    plt.title('Homeworld Diameter vs Population Density', fontsize=16)
    plt.xlabel('Diameter (km)', fontsize=14)
    plt.ylabel('Population Density (per km²)', fontsize=14)
    plt.grid(alpha=0.3)
    plt.legend()
    plt.tight_layout()

    # Show the plot
    plt.show()

# Example Usage: Highlight Coruscant
plot_diameter_vs_population_density(planets_df, highlight_planet='Coruscant')

In [None]:
def plot_lifespan_vs_type(species_df, highlight_species=None):
    # Convert 'average_lifespan' to numeric, ignoring errors for non-numeric values
    species_df['average_lifespan'] = pd.to_numeric(species_df['average_lifespan'], errors='coerce')

    # Drop rows with missing or invalid values in 'average_lifespan' or 'classification'
    species_df_cleaned = species_df.dropna(subset=['average_lifespan', 'classification'])

    # Assign unique numeric values to each species type (classification) for the x-axis
    type_mapping = {t: i for i, t in enumerate(species_df_cleaned['classification'].unique())}
    species_df_cleaned['type_numeric'] = species_df_cleaned['classification'].map(type_mapping)

    # Create the scatter plot
    plt.figure(figsize=(10, 6))
    plt.scatter(
        species_df_cleaned['type_numeric'], 
        species_df_cleaned['average_lifespan'], 
        color='skyblue', 
        edgecolor='black', 
        alpha=0.7, 
        label='Other Species'
    )

    # Add x-axis labels corresponding to species type
    plt.xticks(
        ticks=list(type_mapping.values()),
        labels=list(type_mapping.keys()),
        rotation=45,
        ha='right'
    )

    # Highlight specific species if provided
    if highlight_species and highlight_species in species_df_cleaned['name'].values:
        highlight_row = species_df_cleaned[species_df_cleaned['name'] == highlight_species]
        plt.scatter(
            highlight_row['type_numeric'], 
            highlight_row['average_lifespan'], 
            color='orange', 
            edgecolor='black', 
            s=100,  # Larger marker size for emphasis
            label=f'{highlight_species}'
        )
        # Add annotation for the highlighted species
        plt.annotate(
            f"{highlight_species}\n({highlight_row['classification'].values[0]}, {highlight_row['average_lifespan'].values[0]} years)",
            (highlight_row['type_numeric'].values[0], highlight_row['average_lifespan'].values[0]),
            textcoords="offset points",
            xytext=(10, -10),  # Offset for the annotation
            ha='left',
            fontsize=10,
            color='orange'
        )

    # Add labels, title, legend, and grid
    plt.title('Species Lifespan vs Type', fontsize=16)
    plt.xlabel('Species Type (Classification)', fontsize=14)
    plt.ylabel('Average Lifespan (years)', fontsize=14)
    plt.grid(alpha=0.3)
    plt.legend()
    plt.tight_layout()

    # Show the plot
    plt.show()

# Example Usage: Highlight Wookiee
plot_lifespan_vs_type(species_df, highlight_species='Wookiee')

In [None]:
def plot_height_vs_lifespan(species_df, highlight_species=None):
    # Convert 'average_lifespan' and 'average_height' to numeric, ignoring errors for non-numeric values
    species_df['average_lifespan'] = pd.to_numeric(species_df['average_lifespan'], errors='coerce')
    species_df['average_height'] = pd.to_numeric(species_df['average_height'], errors='coerce')

    # Drop rows with missing or invalid values in 'average_lifespan' or 'average_height'
    species_df_cleaned = species_df.dropna(subset=['average_lifespan', 'average_height'])

    # Create the scatter plot
    plt.figure(figsize=(10, 6))
    plt.scatter(
        species_df_cleaned['average_height'], 
        species_df_cleaned['average_lifespan'], 
        color='skyblue', 
        edgecolor='black', 
        alpha=0.7, 
        label='Other Species'
    )

    # Highlight specific species if provided
    if highlight_species and highlight_species in species_df_cleaned['name'].values:
        highlight_row = species_df_cleaned[species_df_cleaned['name'] == highlight_species]
        plt.scatter(
            highlight_row['average_height'], 
            highlight_row['average_lifespan'], 
            color='orange', 
            edgecolor='black', 
            s=100,  # Larger marker size for emphasis
            label=f'{highlight_species}'
        )
        # Add annotation for the highlighted species
        plt.annotate(
            f"{highlight_species}\n({highlight_row['average_height'].values[0]} cm, {highlight_row['average_lifespan'].values[0]} years)",
            (highlight_row['average_height'].values[0], highlight_row['average_lifespan'].values[0]),
            textcoords="offset points",
            xytext=(10, -10),  # Offset for the annotation
            ha='left',
            fontsize=10,
            color='orange'
        )

    # Add labels, title, legend, and grid
    plt.title('Species Average Height vs Average Lifespan', fontsize=16)
    plt.xlabel('Average Height (cm)', fontsize=14)
    plt.ylabel('Average Lifespan (years)', fontsize=14)
    plt.grid(alpha=0.3)
    plt.legend()
    plt.tight_layout()

    # Show the plot
    plt.show()

# Example Usage: Highlight Human
plot_height_vs_lifespan(species_df, highlight_species='Human')

In [None]:
def plot_speed_vs_cost(starships_df, highlight_starship=None):
    # Convert 'cost_in_credits' and 'max_atmosphering_speed' to numeric, ignoring errors for non-numeric values
    starships_df['cost_in_credits'] = pd.to_numeric(starships_df['cost_in_credits'], errors='coerce')
    starships_df['max_atmosphering_speed'] = pd.to_numeric(starships_df['max_atmosphering_speed'], errors='coerce')

    # Drop rows with missing or invalid values in 'cost_in_credits' or 'max_atmosphering_speed'
    starships_df_cleaned = starships_df.dropna(subset=['cost_in_credits', 'max_atmosphering_speed'])

    # Create the scatter plot
    plt.figure(figsize=(10, 6))
    plt.scatter(
        starships_df_cleaned['max_atmosphering_speed'], 
        starships_df_cleaned['cost_in_credits'], 
        color='skyblue', 
        edgecolor='black', 
        alpha=0.7, 
        label='Other Starships'
    )

    # Highlight specific starship if provided
    if highlight_starship and highlight_starship in starships_df_cleaned['name'].values:
        highlight_row = starships_df_cleaned[starships_df_cleaned['name'] == highlight_starship]
        plt.scatter(
            highlight_row['max_atmosphering_speed'], 
            highlight_row['cost_in_credits'], 
            color='orange', 
            edgecolor='black', 
            s=100,  # Larger marker size for emphasis
            label=f'{highlight_starship}'
        )
        # Add annotation for the highlighted starship
        plt.annotate(
            f"{highlight_starship}\n({highlight_row['max_atmosphering_speed'].values[0]} speed, {highlight_row['cost_in_credits'].values[0]} credits)",
            (highlight_row['max_atmosphering_speed'].values[0], highlight_row['cost_in_credits'].values[0]),
            textcoords="offset points",
            xytext=(10, -10),  # Offset for the annotation
            ha='left',
            fontsize=10,
            color='orange'
        )

    # Add labels, title, legend, and grid
    plt.title('Starship Speed vs Cost', fontsize=16)
    plt.xlabel('Max Atmosphering Speed', fontsize=14)
    plt.ylabel('Cost (Credits)', fontsize=14)
    plt.grid(alpha=0.3)
    plt.legend()
    plt.tight_layout()

    # Show the plot
    plt.show()

# Example Usage: Highlight Millennium Falcon
plot_speed_vs_cost(starships_df, highlight_starship='Millennium Falcon')

In [None]:
def plot_passenger_capacity_vs_size(starships_df, highlight_starship=None):
    # Convert 'passengers' and 'length' to numeric, ignoring errors for non-numeric values
    starships_df['passengers'] = pd.to_numeric(starships_df['passengers'], errors='coerce')
    starships_df['length'] = pd.to_numeric(starships_df['length'], errors='coerce')

    # Drop rows with missing or invalid values in 'passengers' or 'length'
    starships_df_cleaned = starships_df.dropna(subset=['passengers', 'length'])

    # Create the scatter plot
    plt.figure(figsize=(10, 6))
    plt.scatter(
        starships_df_cleaned['length'], 
        starships_df_cleaned['passengers'], 
        color='skyblue', 
        edgecolor='black', 
        alpha=0.7, 
        label='Other Starships'
    )

    # Highlight specific starship if provided
    if highlight_starship and highlight_starship in starships_df_cleaned['name'].values:
        highlight_row = starships_df_cleaned[starships_df_cleaned['name'] == highlight_starship]
        plt.scatter(
            highlight_row['length'], 
            highlight_row['passengers'], 
            color='orange', 
            edgecolor='black', 
            s=100,  # Larger marker size for emphasis
            label=f'{highlight_starship}'
        )
        # Add annotation for the highlighted starship
        plt.annotate(
            f"{highlight_starship}\n({highlight_row['length'].values[0]} m, {highlight_row['passengers'].values[0]} passengers)",
            (highlight_row['length'].values[0], highlight_row['passengers'].values[0]),
            textcoords="offset points",
            xytext=(10, -10),  # Offset for the annotation
            ha='left',
            fontsize=10,
            color='orange'
        )

    # Add labels, title, legend, and grid
    plt.title('Starship Passenger Capacity vs Size', fontsize=16)
    plt.xlabel('Size (Length in meters)', fontsize=14)
    plt.ylabel('Passenger Capacity', fontsize=14)
    plt.grid(alpha=0.3)
    plt.legend()
    plt.tight_layout()

    # Show the plot
    plt.show()

# Example Usage: Highlight Star Destroyer
plot_passenger_capacity_vs_size(starships_df, highlight_starship='Star Destroyer')


In [None]:
# Calculate the number of films for each character
people_df['film_count'] = people_df['films'].apply(len)

# Function to create the bar chart with spacing
def plot_film_count(people_df, highlight_character=None):
    # Sort by film count for a cleaner chart
    people_df_sorted = people_df.sort_values(by='film_count', ascending=False)

    # Create space between bars
    x_positions = np.arange(len(people_df_sorted))

    # Assign colors: highlight one character differently
    colors = ['orange' if name == highlight_character else 'skyblue' for name in people_df_sorted['name']]

    # Create the bar chart with adjusted width and spacing
    plt.figure(figsize=(18, 6))
    plt.bar(x_positions, people_df_sorted['film_count'], color=colors, width=0.6)  # Adjust width for spacing
    plt.xticks(x_positions, people_df_sorted['name'], rotation=45, ha='right')  # Add rotation and spacing
    plt.title('Number of Films per Character', fontsize=16)
    plt.xlabel('Character', fontsize=14)
    plt.ylabel('Number of Films', fontsize=14)

    # Annotate highlighted character
    if highlight_character in people_df_sorted['name'].values:
        highlight_row = people_df_sorted[people_df_sorted['name'] == highlight_character]
        highlight_index = people_df_sorted[people_df_sorted['name'] == highlight_character].index[0]
        plt.text(
            x_positions[list(people_df_sorted.index).index(highlight_index)],
            highlight_row['film_count'].values[0] + 0.2,
            f"{highlight_character}: {highlight_row['film_count'].values[0]} films",
            ha='center',
            fontsize=10,
            color='orange'
        )

    # Add spacing between bars
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(axis='y', linestyle='--', alpha=0.7)  # Add light grid for better readability
    plt.tight_layout()

    plt.show()

# Example Usage: Highlight "Luke Skywalker"
plot_film_count(people_df, highlight_character='Luke Skywalker')

In [None]:
# Calculate the number of films for each planet
planets_df['film_count'] = planets_df['films'].apply(len)

def plot_film_count_by_planet(planets_df, highlight_planet=None):
    # Sort by film count for a cleaner chart
    planets_df_sorted = planets_df.sort_values(by='film_count', ascending=False)

    # Create space between bars
    x_positions = np.arange(len(planets_df_sorted))

    # Assign colors: highlight one planet differently
    colors = ['orange' if name == highlight_planet else 'skyblue' for name in planets_df_sorted['name']]

    # Create the bar chart with adjusted width and spacing
    plt.figure(figsize=(18, 6))
    plt.bar(x_positions, planets_df_sorted['film_count'], color=colors, width=0.6)  # Adjust width for spacing
    plt.xticks(x_positions, planets_df_sorted['name'], rotation=45, ha='right')  # Add rotation and spacing
    plt.title('Number of Films per Planet', fontsize=16)
    plt.xlabel('Planet', fontsize=14)
    plt.ylabel('Number of Films', fontsize=14)

    # Annotate highlighted planet
    if highlight_planet in planets_df_sorted['name'].values:
        highlight_row = planets_df_sorted[planets_df_sorted['name'] == highlight_planet]
        highlight_index = planets_df_sorted[planets_df_sorted['name'] == highlight_planet].index[0]
        plt.text(
            x_positions[list(planets_df_sorted.index).index(highlight_index)],
            highlight_row['film_count'].values[0] + 0.2,
            f"{highlight_planet}: {highlight_row['film_count'].values[0]} films",
            ha='center',
            fontsize=10,
            color='orange'
        )

    # Add spacing between bars
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(axis='y', linestyle='--', alpha=0.7)  # Add light grid for better readability
    plt.tight_layout()

    plt.show()

# Example Usage: Highlight "Tatooine"
plot_film_count_by_planet(planets_df, highlight_planet='Tatooine')

In [None]:
# Calculate the number of films for each species
species_df['film_count'] = species_df['films'].apply(len)

def plot_film_count_by_species(species_df, highlight_species=None):
    # Sort by film count for a cleaner chart
    species_df_sorted = species_df.sort_values(by='film_count', ascending=False)

    # Create space between bars
    x_positions = np.arange(len(species_df_sorted))

    # Assign colors: highlight one species differently
    colors = ['orange' if name == highlight_species else 'skyblue' for name in species_df_sorted['name']]

    # Create the bar chart with adjusted width and spacing
    plt.figure(figsize=(18, 6))
    plt.bar(x_positions, species_df_sorted['film_count'], color=colors, width=0.6)  # Adjust width for spacing
    plt.xticks(x_positions, species_df_sorted['name'], rotation=45, ha='right')  # Add rotation and spacing
    plt.title('Number of Films per Species', fontsize=16)
    plt.xlabel('Species', fontsize=14)
    plt.ylabel('Number of Films', fontsize=14)

    # Annotate highlighted species
    if highlight_species in species_df_sorted['name'].values:
        highlight_row = species_df_sorted[species_df_sorted['name'] == highlight_species]
        highlight_index = species_df_sorted[species_df_sorted['name'] == highlight_species].index[0]
        plt.text(
            x_positions[list(species_df_sorted.index).index(highlight_index)],
            highlight_row['film_count'].values[0] + 0.2,
            f"{highlight_species}: {highlight_row['film_count'].values[0]} films",
            ha='center',
            fontsize=10,
            color='orange'
        )

    # Add spacing between bars
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(axis='y', linestyle='--', alpha=0.7)  # Add light grid for better readability
    plt.tight_layout()

    plt.show()

# Example Usage: Highlight "Wookiee"
plot_film_count_by_species(species_df, highlight_species='Wookiee')

In [None]:
# Calculate the number of films for each starship
starships_df['film_count'] = starships_df['films'].apply(len)

def plot_film_count_by_starship(starships_df, highlight_starship=None):
    # Sort by film count for a cleaner chart
    starships_df_sorted = starships_df.sort_values(by='film_count', ascending=False)

    # Create space between bars
    x_positions = np.arange(len(starships_df_sorted))

    # Assign colors: highlight one starship differently
    colors = ['orange' if name == highlight_starship else 'skyblue' for name in starships_df_sorted['name']]

    # Create the bar chart with adjusted width and spacing
    plt.figure(figsize=(18, 6))
    plt.bar(x_positions, starships_df_sorted['film_count'], color=colors, width=0.6)  # Adjust width for spacing
    plt.xticks(x_positions, starships_df_sorted['name'], rotation=45, ha='right')  # Add rotation and spacing
    plt.title('Number of Films per Starship', fontsize=16)
    plt.xlabel('Starship', fontsize=14)
    plt.ylabel('Number of Films', fontsize=14)

    # Annotate highlighted starship
    if highlight_starship in starships_df_sorted['name'].values:
        highlight_row = starships_df_sorted[starships_df_sorted['name'] == highlight_starship]
        highlight_index = starships_df_sorted[starships_df_sorted['name'] == highlight_starship].index[0]
        plt.text(
            x_positions[list(starships_df_sorted.index).index(highlight_index)],
            highlight_row['film_count'].values[0] + 0.2,
            f"{highlight_starship}: {highlight_row['film_count'].values[0]} films",
            ha='center',
            fontsize=10,
            color='orange'
        )

    # Add spacing between bars
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(axis='y', linestyle='--', alpha=0.7)  # Add light grid for better readability
    plt.tight_layout()

    plt.show()

# Example Usage: Highlight "Millennium Falcon"
plot_film_count_by_starship(starships_df, highlight_starship='Millennium Falcon')