In [3]:
import pandas as pd
import plotly.express as px

In [4]:
stipends = pd.read_csv("data/boston_stipends.csv")

In [5]:
stipends["University"].value_counts()

University
Harvard University         159
MIT                        144
Boston University          139
Northeastern University     82
Tufts University            58
UMass Boston                18
Name: count, dtype: int64

In [6]:
avg_by_year = stipends[["Academic Year", "University", "Overall Pay"]].groupby(["Academic Year", "University"]).mean().reset_index()

In [7]:
stipends["Academic Year"].value_counts()

Academic Year
2020    119
2016     92
2022     73
2018     67
2019     58
2021     50
2017     34
2023     31
2014     19
2025     19
2015     15
2024     14
2013      5
2011      2
2012      2
Name: count, dtype: int64

In [8]:
stipends_over_time = px.line(
    avg_by_year,
    x="Academic Year",
    y="Overall Pay",
    color="University",
    markers=True,
    title="While average stipends have gone up,<br>they are still below a living wage for the Boston area",
    width=1000,
    height = 600
).update_layout(
    yaxis_tickprefix = '$', 
    yaxis_tickformat = ',.'
).update_yaxes(title="Overall Pay (Average)")

stipends_over_time.add_hline(y=63942, line_dash="dash", annotation_text="2025 Boston Living Wage: $63,942", line=dict(color="#A2A2A2"))
stipends_over_time.add_hline(y=15650, line_dash="dash", annotation_text="Masschusetts Poverty Line: $15,650", line=dict(color="#A2A2A2"))
stipends_over_time.add_vline(x=2025, line_dash="dot", line=dict(color="#A2A2A2"))

stipends_over_time.show()

In [9]:
def dept_name(elem):
    if elem in ["computer science", "khoury college of computer sciences", "khoury", "computer", "phd in computer science"]:
        return "computer science"
    if elem in ["economics", "econ"]:
        return "economics"
    if elem in ["english", "english phd"]:
        return "english"
    if elem in ['marine and environmental science', 'marine and environmental sciences']:
        return 'marine and environmental sciences'
    if elem in ['sociology and anthropology', 'sociology']:
        return  'sociology and anthropology'
    if elem in ['mechanical and industrial engineering', 'mechanical engineering','industrial engineering', 'college of engineering']:
        return 'mechanical and industrial engineering'
    if elem in ['psychology','counseling psychology','applied psychology']:
        return 'psychology'
    return elem

stipends["Department"] = stipends[stipends["University"] == "Northeastern University"]["Department"].apply(dept_name)

In [10]:
neu_stipends= stipends[stipends["University"] == "Northeastern University"][["Academic Year", "Department", "Overall Pay"]].groupby(["Academic Year", "Department"]).mean().reset_index()

In [11]:
neu_stipends["Department"].value_counts()


Department
computer science                         6
bioengineering                           5
sociology and anthropology               5
psychology                               5
english                                  4
political science                        4
biology                                  3
electrical and computer engineering      3
history                                  3
physics                                  3
economics                                2
interdisciplinary design and media       2
mathematics                              1
criminology                              1
public policy                            1
population health                        1
mechanical and industrial engineering    1
pharmacology                             1
computer engineering                     1
marine and environmental sciences        1
civil and environmental engineering      1
chemical engineering                     1
health sciences                          1


In [12]:
depts_with_data = ['computer science', 'psychology', 'bioengineering', 'english',
       'sociology and anthropology', 'political science',
       'mechanical and industrial engineering',
       'electrical and computer engineering', 'biology', 'physics', 'history']

neu_avgs = neu_stipends[neu_stipends["Department"].isin(depts_with_data)]

In [13]:
neu_stipends_time = px.line(
    neu_avgs,
    x="Academic Year",
    y="Overall Pay",
    color="Department",
    markers=True,
    height=800
).update_layout(
    yaxis_tickprefix = '$', yaxis_tickformat = ',.'
).update_yaxes(title="Overall Pay (Average)")
neu_stipends_time.add_hline(y=63942, line_dash="dash", annotation_text="2025 Boston Living Wage: $63,942", line=dict(color="#A2A2A2"))
neu_stipends_time.add_hline(y=15650, line_dash="dash", annotation_text="Masschusetts Poverty Line: $15,650", line=dict(color="#A2A2A2"))
neu_stipends_time.add_vline(x=2025, line_dash="dot", line=dict(color="#A2A2A2"))
neu_stipends_time.show()

### Timeline

In [14]:
import numpy as np
import textwrap

In [66]:
negotiations = pd.read_csv("data/contract_negotiations.csv")

In [67]:
negotiations["Start Date"] = pd.to_datetime(negotiations["Date"])
times = sorted(negotiations["Start Date"].unique())
times.append(pd.to_datetime("2025-05-30"))

In [68]:
def end_date(elem):
    time = times.index(elem)
    return times[time + 1]

negotiations["End Date"] = negotiations["Start Date"].apply(end_date)

In [55]:
all_dates = negotiations[["Article", "Date"]].groupby("Article").value_counts()

In [56]:
all_dates

Article                          Date      
Accessibility                    8/26/2024      9
Appointment Security             12/20/2024     5
                                 4/3/2024       5
                                 3/11/2025      1
Appointments and Reappointments  5/15/2024      6
                                               ..
Workspace and Materials          9/19/2024     20
                                 8/26/2024     18
                                 11/18/2024    13
                                 12/9/2024     13
                                 3/11/2025     13
Name: count, Length: 139, dtype: int64

In [81]:
single_adds = ['Accessibility', "Artificial Intelligence", 
                'Comprehensive and Complete Agreement', 'FERPA Waiver Form',
                'Hourly Assignments','Housing', 'No Strike No Lockout',
                'Relocation Assistance','Retirement','Sub-Contracting','Tax Assistance',
                'Tuition and Fees']

In [78]:
last_adds = [('Appointment Security', '3/11/2025'), ('Appointments and Reappointments', '3/25/2025'), 
             ('Automation', '1/28/2025'), ('Bargaining Ground Rules', '2/16/2024'), ('Discipline and Dismissal', '3/25/2025'),
             ('Employment Records', '4/11/2025'), ('Grievance and Arbitration', '2/18/2025'), 
             ('Health and Safety', '4/11/2025'), ('Holidays', '12/20/2024'), ('Intellectual Property', '3/11/2025'),
             ('International Worker Rights', '3/11/2025'), ('Job Postings', '2/18/2025'), 
             ('Labor Management Committee', '10/11/2024'), ('Management Rights', '2/18/2025'),
             ('Parking and Transit', '8/26/2024'), ('Professional Development', '3/25/2025'), ('Professional and Academic Freedom', '3/11/2025'),
             ('Prohibition Against Discrimination and Harassment', '4/11/2025'), ('Recognition', '3/11/2025'),
             ('Severability', '9/19/2024'), ('Successorship', '1/28/2025'),('Titles and Classifications', '3/25/2025'),
             ('Training', '3/25/2025'), ('Travel', '3/25/2025'), ('Union Access and Rights', '3/11/2025'),
             ('Union Officers and Stewards', '3/11/2025'), ('Union Security', '2/18/2025'),
             ('Vacation and Personal Time', '8/26/2024'), ('Workspace and Materials', '3/11/2025')]

In [82]:
negotiations.loc[negotiations["Article"].isin(single_adds), "End Date"] = times[-1]

In [79]:
for add in last_adds:
    negotiations.loc[(negotiations["Article"]==add[0]) & (negotiations["Date"]==add[1]), "End Date"] = times[-1]

In [59]:
def split_string(elem):
    # wraps the tooltip text so that it's max 70 characters wide
    return "<br>".join(textwrap.wrap(elem))

In [76]:
negotiations["Changes from Previous Version"] = negotiations["Changes from Previous Version"].apply(split_string)

In [83]:
def negotiation_timeline(range_):
    timeline = px.timeline(negotiations, 
                x_start=negotiations["Start Date"], 
                x_end=negotiations["End Date"],
                y="Article",
                color="Party",
                custom_data="Changes from Previous Version",
                labels={"Article":""})

    timeline.update_traces(hovertemplate= 
                        "<b>Topic:</b> %{y} <br>" +
                        "<b>Date: </b> %{x} <br>" +
                        "%{customdata}<extra></extra>")

    timeline.update_layout(
        xaxis = dict(
            tickmode = "array",
            tickvals = times,
            ticktext = [time.date().strftime("%b %d, %y") for time in times[:-1]] + ["Present"],
            range = range_,
        ),
    )
    return timeline

timeline = negotiation_timeline([negotiations["Start Date"].min(), negotiations["End Date"].max()])

timeline.show()

In [28]:
from dash import Dash, dcc, html, callback, Output, Input

In [51]:
app = Dash()
app.layout = html.Div([
    html.H3("Timeline of Contract Negotiations"),
    dcc.Graph(
        figure=timeline,
        id="negotiation-timeline"
    ),
    dcc.RangeSlider(
        min = 0,
        max = len(times)-1,
        step=1,
        value=[0, len(times)-1],
        marks = dict((k, v.date().strftime("%m/%d/%y")) if k is not len(times) - 1 else (k, "Present") for (k,v) in enumerate(times) ),
        id="timeline-slider"
    )
])

@app.callback(
    Output("negotiation-timeline", "figure"),
    Input("timeline-slider", "value") # dates
)
def update_timeline(dates):
    
    return negotiation_timeline([times[dates[0]], times[dates[1]]])

app.run(debug=True, use_reloader = False)  # Turn off reloader if inside Jupyter