In [58]:
import pandas as pd
import altair as alt

# Load the dataset
# Load the dataset
df = pd.read_csv('degrees-that-pay-back.csv')

# List of salary columns
salary_columns = [
    'Starting Median Salary',
    'Mid-Career 10th Percentile Salary',
    'Mid-Career 25th Percentile Salary',
    'Mid-Career Median Salary',
    'Mid-Career 75th Percentile Salary',
    'Mid-Career 90th Percentile Salary'
]

# Check if a column is a string and contains a $ sign then clean it
for column in salary_columns:
    if df[column].dtype == object and df[column].str.contains(r'\$').any():
        df[column] = df[column].str.replace(r'[$,]', '', regex=True).astype(float)
    else:
        df[column] = df[column].astype(float)



# Drop rows where 'Percent change from Starting to Mid-Career Salary' is NaN
df = df.dropna(subset=['Percent change from Starting to Mid-Career Salary'])

# Melt the DataFrame for all majors for the main chart
melted_df = df.melt(
    id_vars='Undergraduate Major',
    value_vars=salary_columns ,
    var_name='Salary Type', value_name='Median Salary'
)

# Define a dropdown selection for the Undergraduate Major
majors_list = sorted(df['Undergraduate Major'].unique())
major_dropdown = alt.binding_select(options=majors_list, name='Select Major')
major_select = alt.selection_point(fields=['Undergraduate Major'], bind=major_dropdown, name='Major')

# Define a dropdown selection for the Salary Type
salary_types = salary_columns + ['Percent change from Starting to Mid-Career Salary']
salary_type_dropdown = alt.binding_select(options=salary_types, name='Select Salary Type')
salary_type_select = alt.selection_point(fields=['Salary Type'], bind=salary_type_dropdown, name='SalaryType')

# Define the bar chart for all majors
main_bar_chart = alt.Chart(melted_df).mark_bar().encode(
    x=alt.X('Salary Type:N'),
    y=alt.Y('Median Salary:Q'),
    color='Salary Type:N',
    tooltip=['Undergraduate Major:N', 'Salary Type:N', 'Median Salary:Q']
).transform_filter(
    major_select
).properties(
    width=600,
    height=300
).add_params(
    major_select
)

# Calculate the top ten majors based on 'Starting Median Salary'
top_ten_majors = df.nlargest(10, 'Starting Median Salary')['Undergraduate Major']

# Filter the DataFrame for the top ten chart
df_top_ten = df[df['Undergraduate Major'].isin(top_ten_majors)]

# Melt the top ten DataFrame
melted_top_ten_df = df_top_ten.melt(
    id_vars='Undergraduate Major',
    value_vars=salary_columns + ['Percent change from Starting to Mid-Career Salary'],
    var_name='Salary Type', value_name='Median Salary'
)

# Define the bar chart for the top ten majors
top_ten_bar_chart = alt.Chart(melted_top_ten_df).mark_bar().encode(
    x=alt.X('Median Salary:Q'),
    y=alt.Y('Undergraduate Major:N', sort='-x'),  # Sort descendingly by selected salary type
    color='Salary Type:N',
    tooltip=['Undergraduate Major:N', 'Salary Type:N', 'Median Salary:Q']
).add_params(
    salary_type_select
).transform_filter(
    salary_type_select
).properties(
    width=300,
    height=300
)

# Combine the charts
combined_chart = alt.hconcat(
    main_bar_chart,
    top_ten_bar_chart,
    spacing=50
).resolve_scale(
    color='independent'
)

combined_chart

In [43]:
import pandas as pd
import altair as alt

# Load the regional dataset
df_region = pd.read_csv('salaries-by-region.csv')

# Define the salary-related columns
salary_columns = [
    'Starting Median Salary', 'Mid-Career Median Salary',
    'Mid-Career 10th Percentile Salary', 'Mid-Career 25th Percentile Salary',
    'Mid-Career 75th Percentile Salary', 'Mid-Career 90th Percentile Salary',
]

# Clean the salary columns in the DataFrame
for column in salary_columns:
    df_region[column] = df_region[column].str.replace(r'[$,]', '', regex=True)
    df_region[column] = pd.to_numeric(df_region[column], errors='coerce')  # Convert columns to numeric, turn errors to NaN

# Melt the DataFrame for Altair visualization
melted_df_region = df_region.melt(
    id_vars=['School Name', 'Region'],
    value_vars=salary_columns,
    var_name='Salary Type', value_name='Median Salary'
)

# Create an Altair chart object
chart = alt.Chart(melted_df_region).mark_circle(size=100).encode(
    x=alt.X('Region:N', axis=alt.Axis(title='Region')),
    y=alt.Y('Median Salary:Q', axis=alt.Axis(title='Median Salary'), scale=alt.Scale(zero=True)),  # Set zero=True to start y-axis at 0
    color=alt.Color('Region:N', legend=alt.Legend(title="Region")),
    tooltip=['School Name:N', 'Median Salary:Q', 'Region:N'],
    size=alt.Size('Median Salary:Q', legend=alt.Legend(title="Median Salary")),
    column=alt.Column('Salary Type:N', sort=salary_columns, header=alt.Header(title=None))
).properties(
    width=180,  # Chart width
    height=300   # Chart height
).interactive()

chart

In [48]:
import pandas as pd
import altair as alt

# Load the regional dataset
df_region = pd.read_csv('salaries-by-region.csv')

# Define the salary-related columns
salary_columns = [
    'Starting Median Salary', 'Mid-Career Median Salary',
    'Mid-Career 10th Percentile Salary', 'Mid-Career 25th Percentile Salary',
    'Mid-Career 75th Percentile Salary', 'Mid-Career 90th Percentile Salary',
]

# Clean the salary columns in the DataFrame
for column in salary_columns:
    df_region[column] = df_region[column].str.replace(r'[$,]', '', regex=True)
    df_region[column] = pd.to_numeric(df_region[column], errors='coerce')  # Convert columns to numeric, turn errors to NaN

# Melt the DataFrame for Altair visualization
melted_df_region = df_region.melt(
    id_vars=['School Name', 'Region'],
    value_vars=salary_columns,
    var_name='Salary Type', value_name='Median Salary'
)

# Create an Altair chart object with circle marks
chart = alt.Chart(melted_df_region).mark_circle(size=100).encode(
    x=alt.X('Region:N', axis=alt.Axis(title='Region')),
    y=alt.Y('Median Salary:Q', axis=alt.Axis(title='Median Salary'), scale=alt.Scale(zero=True)),  # Start Y-axis at zero
    color=alt.Color('Region:N', legend=alt.Legend(title="Region")),
    tooltip=['School Name:N', 'Median Salary:Q', 'Region:N'],
    size=alt.Size('Median Salary:Q', legend=alt.Legend(title="Median Salary")),
    column=alt.Column('Salary Type:N', header=alt.Header(title=None))
).properties(
    width=180,  # Chart width
    height=300   # Chart height
)

# Define an interactive selection that binds to the x-axis (Region)
# This is helpful in case there are many regions and we need to scroll horizontally
# Note: vertical panning is not typically useful with categorical y-axes
selection = alt.selection_interval(bind='scales', encodings=['x'])

# Add the selection to the chart for x-axis panning/zooming
final_chart = chart.add_selection(
    selection
).configure_view(
    stroke=None  # Remove border around chart
).interactive()

final_chart



In [59]:
import pandas as pd
import altair as alt

# Load and clean the dataset
df = pd.read_csv('salaries-by-college-type.csv')
salary_columns = ['Starting Median Salary', 'Mid-Career Median Salary', 
                  'Mid-Career 10th Percentile Salary', 'Mid-Career 25th Percentile Salary',
                  'Mid-Career 75th Percentile Salary', 'Mid-Career 90th Percentile Salary']
for column in salary_columns:
    df[column] = df[column].str.replace(r'[$,]', '', regex=True).astype(float)

# Melt the DataFrame for Altair plotting
melted_df = df.melt(
    id_vars=['School Name', 'School Type'],
    value_vars=salary_columns,
    var_name='Salary Measure', 
    value_name='Salary'
)

# Define dropdown selection for School Type
school_types = sorted(df['School Type'].unique())
school_type_dropdown = alt.binding_select(options=school_types, name='School Type')
school_type_select = alt.selection_point(fields=['School Type'], bind=school_type_dropdown, name='Select')

# Define a line chart that connects various salary measures for the selected school name
line_chart = alt.Chart(melted_df).mark_line(point=True).encode(
    x=alt.X('Salary Measure:N', title='Salary Measure', sort=salary_columns),
    y=alt.Y('Salary:Q', title='Salary', scale=alt.Scale(zero=False)),
    color='Salary Measure:N',  # Use different colors for each salary measure
    tooltip=['School Name:N', 'Salary Measure:N', 'Salary:Q']
).transform_filter(
    school_type_select  # Filter chart to the selected school type
).properties(
    title="Salary Range by School Type"
).add_selection(
    school_type_select  # Add dropdown selection for School Type
).interactive()

line_chart

