In [1]:
import pandas as pd
import plotly.express as px

In [2]:
stipends = pd.read_csv("data/boston_stipends.csv")

In [3]:
stipends["University"].value_counts()

University
Harvard University         159
MIT                        144
Boston University          139
Northeastern University     82
Tufts University            58
UMass Boston                18
Name: count, dtype: int64

In [4]:
avg_by_year = stipends[["Academic Year", "University", "Overall Pay"]].groupby(["Academic Year", "University"]).mean().reset_index()

In [5]:
stipends["Academic Year"].value_counts()

Academic Year
2020    119
2016     92
2022     73
2018     67
2019     58
2021     50
2017     34
2023     31
2014     19
2025     19
2015     15
2024     14
2013      5
2011      2
2012      2
Name: count, dtype: int64

In [27]:
stipends_over_time = px.line(
    avg_by_year,
    x="Academic Year",
    y="Overall Pay",
    color="University",
    markers=True,
    title="While average stipends have gone up,<br>they are still below a living wage for the Boston area"
).update_layout(
    yaxis_tickprefix = '$', yaxis_tickformat = ',.'
).update_yaxes(title="Overall Pay (Average)")
stipends_over_time.add_hline(y=63942, line_dash="dash", annotation_text="2025 Boston Living Wage: $63,942", line=dict(color="#A2A2A2"))
stipends_over_time.add_hline(y=15650, line_dash="dash", annotation_text="Masschusetts Poverty Line: $15,650", line=dict(color="#A2A2A2"))
stipends_over_time.add_vline(x=2025, line_dash="dot", line=dict(color="#A2A2A2"))
stipends_over_time.show()

In [13]:
def dept_name(elem):
    if elem in ["computer science", "khoury college of computer sciences", "khoury", "computer", "phd in computer science"]:
        return "computer science"
    if elem in ["economics", "econ"]:
        return "economics"
    if elem in ["english", "english phd"]:
        return "english"
    if elem in ['marine and environmental science', 'marine and environmental sciences']:
        return 'marine and environmental sciences'
    if elem in ['sociology and anthropology', 'sociology']:
        return  'sociology and anthropology'
    if elem in ['mechanical and industrial engineering', 'mechanical engineering','industrial engineering', 'college of engineering']:
        return 'mechanical and industrial engineering'
    if elem in ['psychology','counseling psychology','applied psychology']:
        return 'psychology'
    return elem

stipends["Department"] = stipends[stipends["University"] == "Northeastern University"]["Department"].apply(dept_name)

In [14]:
neu_stipends= stipends[stipends["University"] == "Northeastern University"][["Academic Year", "Department", "Overall Pay"]].groupby(["Academic Year", "Department"]).mean().reset_index()

In [16]:
neu_stipends["Department"].value_counts()


Department
computer science                         6
bioengineering                           5
sociology and anthropology               5
psychology                               5
english                                  4
political science                        4
biology                                  3
electrical and computer engineering      3
history                                  3
physics                                  3
economics                                2
interdisciplinary design and media       2
mathematics                              1
criminology                              1
public policy                            1
population health                        1
mechanical and industrial engineering    1
pharmacology                             1
computer engineering                     1
marine and environmental sciences        1
civil and environmental engineering      1
chemical engineering                     1
health sciences                          1


In [17]:
depts_with_data = ['computer science', 'psychology', 'bioengineering', 'english',
       'sociology and anthropology', 'political science',
       'mechanical and industrial engineering',
       'electrical and computer engineering', 'biology', 'physics', 'history']

neu_avgs = neu_stipends[neu_stipends["Department"].isin(depts_with_data)]

In [20]:
neu_stipends_time = px.line(
    neu_avgs,
    x="Academic Year",
    y="Overall Pay",
    color="Department",
    markers=True,
    height=800
).update_layout(
    yaxis_tickprefix = '$', yaxis_tickformat = ',.'
).update_yaxes(title="Overall Pay (Average)")
neu_stipends_time.show()