In [37]:
import pandas as pd
import plotly.express as px

# Load big dataset
zip_path = "over_one_mil_chars.zip"
df = pd.read_csv(zip_path, compression='zip')

# Filter for only spellcasting classes
selected_classes = ["Bard", "Cleric", "Druid", "Sorcerer", "Warlock", "Wizard"]
df_filtered = df[df["class_starting"].isin(selected_classes)]

# Count number of players per class
class_count = df_filtered["class_starting"].value_counts().reset_index()
class_count.columns = ["Class", "Number of Players"]

# Custom colors
class_colors = {
    "Bard": "#AB6DAC",     # Bard Rogue
    "Cleric": "#91A1B2",   # Cleric Silver
    "Druid": "#7A853B",    # Druid Moss
    "Sorcerer": "#992E2E", # Sorcerer Blood
    "Warlock": "#7B469B",  # Warlock Iris
    "Wizard": "#2A50A1"    # Wizard Cobalt
}

# Create interactive bar chart
fig = px.bar(
    class_count,
    x="Class",
    y="Number of Players",
    # Show count labels on bars
    text="Number of Players",  
    title="Number of Players Per Character Class",
    labels={"Class": "Character Class", "Number of Players": "Count"},
    color="Class",
    # Apply custom color scheme
    color_discrete_map=class_colors 
)

# Modify layout
fig.update_layout(
    xaxis_title="Character Class",
    yaxis_title="Number of Players",
    # Rotate x-axis labels to improve readability
    xaxis_tickangle=-45,  
    height=600, width=700
)

# Show chart
fig.show()


In [36]:
import plotly.express as px

# Load dataset
zip_path = "over_one_mil_chars.zip"
df = pd.read_csv(zip_path, compression='zip', usecols=["class_starting", "total_level"])

# Define level bins and create some labels
bins = [1, 5, 10, 15, 20]
labels = ["1-5", "6-10", "11-15", "16-20"]
df["level_range"] = pd.cut(df["total_level"], bins=bins, labels=labels)

# Filter for spellcasting classes
selected_classes = ["Bard", "Cleric", "Druid", "Sorcerer", "Warlock", "Wizard"]
df_filtered = df[df["class_starting"].isin(selected_classes)]

# Count classes by designated level ranges
df_grouped = df_filtered.groupby(["class_starting", "level_range"], observed=True).size().reset_index(name="count")

# Convert to percentages for readability
df_total = df_grouped.groupby("level_range")["count"].sum().reset_index(name="total_count")
df_percent = df_grouped.merge(df_total, on="level_range")
df_percent["percentage"] = (df_percent["count"] / df_percent["total_count"]) * 100

# Make colors same as offical D&D character sheet themes
class_colors = {
    "Bard": "#AB6DAC",
    "Cleric": "#91A1B2",
    "Druid": "#7A853B",
    "Sorcerer": "#992E2E",
    "Warlock": "#7B469B",
    "Wizard": "#2A50A1"
}

# Create interactive Plotly grouped bar chart
fig = px.bar(
    df_percent,
    x="level_range",
    y="percentage",
    color="class_starting",
    text="percentage",
    title="Class Popularity Across Level Ranges",
    labels={"level_range": "Level Range", "percentage": "Percent of Players (%)", "class_starting": "Class"},
    barmode="group",
    color_discrete_map=class_colors
)

# Modify layout
fig.update_traces(texttemplate='%{text:.1f}%', textposition='outside')
fig.update_layout(
    xaxis_title="Level Range",
    yaxis_title="Percentage of Players",
    # Rotate x-axis labels for better readability
    xaxis_tickangle=-45,  
    height=600, width=1000,
    legend_title="Class"
)

# Show chart
fig.show()






In [1]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

# Load dataset
zip_path = "over_one_mil_chars.zip"
df = pd.read_csv(zip_path, usecols=["class_starting", "race"])

# Keep only six spellcasting classes
selected_classes = ["Bard", "Cleric", "Druid", "Sorcerer", "Warlock", "Wizard"]
df_filtered = df[df["class_starting"].isin(selected_classes)]

# Count number of each race per class
df_counts = df_filtered.groupby(["class_starting", "race"]).size().reset_index(name="count")

# Rank races for each class; keep only top 3
df_counts["ranks"] = df_counts.groupby("class_starting")["count"].rank(method="dense", ascending=False)
df_top_races = df_counts[df_counts["ranks"] <= 3]

# Get unique classes and sorted races
unique_classes = sorted(df_top_races["class_starting"].unique())
sorted_races = sorted(df_top_races["race"].unique())  

# Map from race to angles (evenly spaced around circle)
race_to_angle = {race: angle for race, angle in zip(sorted_races, np.linspace(0, 360, len(sorted_races), endpoint=False))}

# Make colors same as offical D&D character sheet themes
class_colors = {
    "Bard": "#AB6DAC",
    "Cleric": "#91A1B2",
    "Druid": "#7A853B",
    "Sorcerer": "#992E2E",
    "Warlock": "#7B469B",
    "Wizard": "#2A50A1"
}

# Ensure largest count per race is plotted first
df_sorted = df_top_races.sort_values(["race", "count"], ascending=[False, True])

# Create figure
fig = go.Figure()

# Loop through each race and add bars for each class
for race in sorted_races:
    race_data = df_sorted[df_sorted["race"] == race]

    for _, row in race_data.iterrows():
        cls = row["class_starting"]
        angle = race_to_angle[race]
        count = row["count"]

        # Add a radial bar for each class-race pair
        fig.add_trace(go.Barpolar(
            r=[count],
            # Position around the circle
            theta=[angle],  
            # Adjust width of the bars
            width=[360 / len(sorted_races) * 0.9],  
            marker_color=class_colors[cls],
            name=f"{cls} - {race}",
            hoverinfo="text",
            text=f"{cls} ({count})",
            # Hide from legend
            showlegend=False  
        ))
        
# Make it so that the legend is only the 6 classes and not the 18 different 
# combinations of class and race
for cls in unique_classes:
    fig.add_trace(go.Barpolar(
        # Invisible bar
        r=[0], theta=[0],  
        marker_color=class_colors[cls],
        # Only the class name appears in the legend
        name=cls,  
        hoverinfo="skip",
    ))

# Modify layout
fig.update_layout(
    title="Top 3 Races for Each Class (Interactive Radial Chart)",
    polar=dict(
        radialaxis=dict(showticklabels=True, tickfont_size=12),
        angularaxis=dict(showticklabels=True, tickmode="array", tickvals=list(race_to_angle.values()), ticktext=sorted_races)
    ),
    showlegend=True,
    margin=dict(l=120, r=120, t=80, b=80),
    height=800, width=800
)

# Show chart
fig.show()


In [35]:
import plotly.express as px

# Load dataset
zip_path = "over_one_mil_chars.zip"
df = pd.read_csv(zip_path, usecols=["class_starting", "notes_len"])

# Filter for only six spellcasting classes
selected_classes = ["Bard", "Cleric", "Druid", "Sorcerer", "Warlock", "Wizard"]
df_filtered = df[df["class_starting"].isin(selected_classes)]

# Remove entries where 'notes_len' is 0
df_filt_nonzero = df_filtered[df_filtered["notes_len"] > 0]

# Calculate average note length per class
avg_note_length_per_class = df_filt_nonzero.groupby("class_starting")["notes_len"].mean().reset_index()

# Define colors for each class (consistent with prior figures)
class_colors = {
    "Bard": "#AB6DAC",
    "Cleric": "#91A1B2",
    "Druid": "#7A853B",
    "Sorcerer": "#992E2E",
    "Warlock": "#7B469B",
    "Wizard": "#2A50A1"
}

# Create interactive lollipop chart
# Learning I could make a chart of lollipops was amazing
fig = px.scatter(
    avg_note_length_per_class,
    x="class_starting",
    y="notes_len",
    text=avg_note_length_per_class["notes_len"].round(1),
    color="class_starting",
    color_discrete_map=class_colors,
    title="Average Note Length per Spellcasting Class",
    labels={"class_starting": "Class", "notes_len": "Average Note Length"},
)

# Add sticks for lollipop effect
for i, row in avg_note_length_per_class.iterrows():
    fig.add_shape(
        type="line",
        x0=row["class_starting"], x1=row["class_starting"],
        y0=0, y1=row["notes_len"],
        line=dict(color="gray", width=2),
    )

# Modify layout for readability
fig.update_traces(marker=dict(size=15, line=dict(width=2, color="black")), textposition="top center")
fig.update_layout(
    xaxis_title="Class",
    yaxis_title="Average Note Length",
    xaxis_tickangle=-25,
    height=600, width=700,
    showlegend=False
)

# Show chart
fig.show()


In [40]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

# Load dataset
zip_path = "over_one_mil_chars.zip"
df = pd.read_csv(zip_path, usecols=["class_starting", "background"])

# Keep only six spellcasting classes
selected_classes = ["Bard", "Cleric", "Druid", "Sorcerer", "Warlock", "Wizard"]
df_filtered = df[df["class_starting"].isin(selected_classes)]

# Count number of each background per class
df_counts = df_filtered.groupby(["class_starting", "background"]).size().reset_index(name="count")

# Rank backgrounds for each class; keep only top 3
df_counts["ranks"] = df_counts.groupby("class_starting")["count"].rank(method="dense", ascending=False)
df_top_backgrounds = df_counts[df_counts["ranks"] <= 3]

# Get unique classes and sorted backgrounds
unique_classes = sorted(df_top_backgrounds["class_starting"].unique())
sorted_backgrounds = sorted(df_top_backgrounds["background"].unique())  

# Map from background to angles (evenly spaced around circle)
background_to_angle = {background: angle for background, angle in zip(sorted_backgrounds, np.linspace(0, 360, len(sorted_backgrounds), endpoint=False))}

# Make colors same as offical D&D character sheet themes
class_colors = {
    "Bard": "#AB6DAC",
    "Cleric": "#91A1B2",
    "Druid": "#7A853B",
    "Sorcerer": "#992E2E",
    "Warlock": "#7B469B",
    "Wizard": "#2A50A1"
}

# Ensure largest count per background is plotted first
df_sorted = df_top_backgrounds.sort_values(["background", "count"], ascending=[False, True])

# Create figure
fig = go.Figure()

# Loop through each background and add bars for each class
for background in sorted_backgrounds:
    background_data = df_sorted[df_sorted["background"] == background]

    for _, row in background_data.iterrows():
        cls = row["class_starting"]
        angle = background_to_angle[background]
        count = row["count"]

        # Add a radial bar for each class-background pair
        fig.add_trace(go.Barpolar(
            r=[count],
            theta=[angle],  # Position around the circle
            width=[360 / len(sorted_backgrounds) * 0.9],  # Adjust width of the bars
            marker_color=class_colors[cls],
            name=f"{cls} - {background}",
            hoverinfo="text",
            text=f"{cls} ({count})",
            showlegend=False  # Hide from legend
        ))
        
# Make it so that the legend is only the 6 classes and not the 18 different 
# combinations of class and background
for cls in unique_classes:
    fig.add_trace(go.Barpolar(
        r=[0], theta=[0],  
        marker_color=class_color_map[cls],
        name=cls,  # Only the class name appears in the legend
        hoverinfo="skip",
    ))

# Modify layout
fig.update_layout(
    title="Top 3 Backgrounds for Each Class (Interactive Radial Chart)",
    polar=dict(
        radialaxis=dict(showticklabels=True, tickfont_size=12),
        angularaxis=dict(showticklabels=True, tickmode="array", tickvals=list(background_to_angle.values()), ticktext=sorted_backgrounds)
    ),
    showlegend=True,
    height=800, width=800
)

# Show chart
fig.show()


In [33]:
import pandas as pd
import plotly.express as px

# Load dataset
file_path = "cleaned_data_DnD_smaller.csv"
df = pd.read_csv(file_path)

# Filter for six spellcasting classes
selected_classes = ["Bard", "Cleric", "Druid", "Sorcerer", "Warlock", "Wizard"]
df_filtered = df[df["justClass"].isin(selected_classes)]

# Dictionary to store spell counts per class and level
class_spell_counts = {cls: {0: {}, 1: {}, 2: {}} for cls in selected_classes}

# Extract spells by parsing through spell lists
# Count them based on levels
for _, row in df_filtered.iterrows():
    cls = row["justClass"]
    if pd.notna(row["processedSpells"]):
        # Split using '|' as the spell divider
        spell_list = row["processedSpells"].split("|")  
        for spell in spell_list:
            # Split at asterisk to separate spell name and level
            parts = spell.split("*")  
            if len(parts) == 2:
                # Extract spell name and level
                spell_name, level = parts[0].strip(), parts[1].strip()  
                if level.isdigit():
                    level = int(level)
                    # Only process levels 0, 1, and 2
                    if level in [0, 1, 2]:  
                        class_spell_counts[cls][level][spell_name] = class_spell_counts[cls][level].get(spell_name, 0) + 1

# Find most popular spell per class per level
top_class_spells = []
for cls in selected_classes:
    for level in [0, 1, 2]:
        # Ensure spells exist at this level
        if class_spell_counts[cls][level]:  
            top_spell = max(class_spell_counts[cls][level], key=class_spell_counts[cls][level].get)
            count = class_spell_counts[cls][level][top_spell]
            top_class_spells.append({
                "Class": cls, 
                "Spell Level": f"Level {level}", 
                "Spell": top_spell, 
                "Count": count
            })

# Convert to DataFrame for visualization
df_top_class_spells = pd.DataFrame(top_class_spells)

# Custom color scheme for D&D spellcasting classes
class_colors = {
    "Bard": "#AB6DAC",
    "Cleric": "#91A1B2",
    "Druid": "#7A853B",
    "Sorcerer": "#992E2E",
    "Warlock": "#7B469B",
    "Wizard": "#2A50A1"
}

# Create interactive grouped bar chart using Plotly
fig = px.bar(
    df_top_class_spells, 
    x="Spell Level", 
    y="Count", 
    color="Class", 
    text="Spell",  # Set spell names as hover labels
    barmode="group",
    title="Most Popular Spells at Levels 0, 1, and 2 per Class",
    labels={"Count": "Spell Popularity", "Spell Level": "Spell Level"},
    # Show spell name on hover
    hover_data={"Spell": True, "Class": True, "Count": True}, 
    # Apply custom color scheme 
    color_discrete_map=class_colors  
)

# Improve readability
fig.update_layout(
    xaxis_title="Spell Level",
    yaxis_title="Spell Popularity (Count)",
    legend_title="Class",
    bargap=0.15,  # Adjust bar width
    height=800, width=1000
)

# Show the interactive chart
fig.show()


In [34]:
import plotly.express as px
import pandas as pd

# Load dataset
zip_path = "over_one_mil_chars.zip"
df = pd.read_csv(zip_path, usecols=["class_starting", "subclass_starting"])

# Filter for six spellcasting classes
selected_classes = ["Bard", "Cleric", "Druid", "Sorcerer", "Warlock", "Wizard"]
df_filtered = df[df["class_starting"].isin(selected_classes)]

# Count number of each subclass per class
df_counts = df_filtered.groupby(["class_starting", "subclass_starting"]).size().reset_index(name="count")

# Rank subclasses for each class; keep only top 3
df_counts["ranks"] = df_counts.groupby("class_starting")["count"].rank(method="dense", ascending=False)
df_top_subclasses = df_counts[df_counts["ranks"] <= 3]

# Custom color scheme for D&D spellcasting classes
class_colors = {
    "Bard": "#AB6DAC",
    "Cleric": "#91A1B2",
    "Druid": "#7A853B",
    "Sorcerer": "#992E2E",
    "Warlock": "#7B469B",
    "Wizard": "#2A50A1"
}

# Create Sunburst Chart
# Only thing I thought of was the candy, but this is no candy
# After making the radial this was much easier
fig_sunburst = px.sunburst(
    df_top_subclasses,
    path=["class_starting", "subclass_starting"],
    values="count",
    title="Top 3 Subclasses for Each Class (Sunburst Chart)",
    color="class_starting",
    color_discrete_map=class_colors
)

# Modify for better readability
fig_sunburst.update_layout(
    width=700,
    height=700,
    font=dict(size=16),
    # Adjust margins
    margin=dict(t=80, l=10, r=10, b=10)  
)

# Show chart
fig_sunburst.show()


In [31]:
import pandas as pd
import plotly.express as px

# Load dataset
zip_path = "over_one_mil_chars.zip"
df = pd.read_csv(zip_path, usecols=["class_starting", "gold"])

# Filter for spellcasting classes
selected_classes = ["Bard", "Cleric", "Druid", "Sorcerer", "Warlock", "Wizard"]
df_filtered = df[df["class_starting"].isin(selected_classes)]

# Remove extreme values
# Tested this rigorously with a box plot first. Saw very few dots above 350,000
# Tried standard deviation and IQR but both looked weird, so this was the compromise
df_filtered = df_filtered[(df_filtered["gold"] > 0) & (df_filtered["gold"] < 350_000)]

# Directly calculate the average gold per class
df_avg_gold = df_filtered.groupby("class_starting", as_index=False)["gold"].mean()

# Rename column for clarity
df_avg_gold.rename(columns={"gold": "Average Gold"}, inplace=True)

# Custom color scheme for D&D spellcasting classes
class_colors = {
    "Bard": "#AB6DAC",
    "Cleric": "#91A1B2",
    "Druid": "#7A853B",
    "Sorcerer": "#992E2E",
    "Warlock": "#7B469B",
    "Wizard": "#2A50A1"
}

# Bar chart of average gold per class
fig = px.bar(df_avg_gold, x="class_starting", y="Average Gold",
             title="Average Gold per Spellcasting Class",
             labels={"class_starting": "Class", "Average Gold": "Gold (Avg)"},
             color="class_starting",color_discrete_map=class_colors)

# Modify layout
fig.update_layout(
    width=700, 
    height=600,  
    xaxis_title="Character Class",
    yaxis_title="Average Gold",
    title_font_size=18
)

fig.show()