In [1]:
import pandas as pd
import plotly.express as px

In [2]:
stipends = pd.read_csv("data/boston_stipends.csv")

In [3]:
stipends["University"].value_counts()

University
Harvard University         159
MIT                        144
Boston University          139
Northeastern University     82
Tufts University            58
UMass Boston                18
Name: count, dtype: int64

In [4]:
avg_by_year = stipends[["Academic Year", "University", "Overall Pay"]].groupby(["Academic Year", "University"]).mean().reset_index()

In [5]:
stipends["Academic Year"].value_counts()

Academic Year
2020    119
2016     92
2022     73
2018     67
2019     58
2021     50
2017     34
2023     31
2014     19
2025     19
2015     15
2024     14
2013      5
2011      2
2012      2
Name: count, dtype: int64

In [6]:
stipends_over_time = px.line(
    avg_by_year,
    x="Academic Year",
    y="Overall Pay",
    color="University",
    markers=True,
    title="While average stipends have gone up,<br>they are still below a living wage for the Boston area",
    width=1000,
    height = 600
).update_layout(
    yaxis_tickprefix = '$', 
    yaxis_tickformat = ',.'
).update_yaxes(title="Overall Pay (Average)")

stipends_over_time.add_hline(y=63942, line_dash="dash", annotation_text="2025 Boston Living Wage: $63,942", line=dict(color="#A2A2A2"))
stipends_over_time.add_hline(y=15650, line_dash="dash", annotation_text="Masschusetts Poverty Line: $15,650", line=dict(color="#A2A2A2"))
stipends_over_time.add_vline(x=2025, line_dash="dot", line=dict(color="#A2A2A2"))

stipends_over_time.show()

In [7]:
def dept_name(elem):
    if elem in ["computer science", "khoury college of computer sciences", "khoury", "computer", "phd in computer science"]:
        return "computer science"
    if elem in ["economics", "econ"]:
        return "economics"
    if elem in ["english", "english phd"]:
        return "english"
    if elem in ['marine and environmental science', 'marine and environmental sciences']:
        return 'marine and environmental sciences'
    if elem in ['sociology and anthropology', 'sociology']:
        return  'sociology and anthropology'
    if elem in ['mechanical and industrial engineering', 'mechanical engineering','industrial engineering', 'college of engineering']:
        return 'mechanical and industrial engineering'
    if elem in ['psychology','counseling psychology','applied psychology']:
        return 'psychology'
    return elem

stipends["Department"] = stipends[stipends["University"] == "Northeastern University"]["Department"].apply(dept_name)

In [8]:
neu_stipends= stipends[stipends["University"] == "Northeastern University"][["Academic Year", "Department", "Overall Pay"]].groupby(["Academic Year", "Department"]).mean().reset_index()

In [9]:
neu_stipends["Department"].value_counts()


Department
computer science                         6
bioengineering                           5
sociology and anthropology               5
psychology                               5
english                                  4
political science                        4
biology                                  3
electrical and computer engineering      3
history                                  3
physics                                  3
economics                                2
interdisciplinary design and media       2
mathematics                              1
criminology                              1
public policy                            1
population health                        1
mechanical and industrial engineering    1
pharmacology                             1
computer engineering                     1
marine and environmental sciences        1
civil and environmental engineering      1
chemical engineering                     1
health sciences                          1


In [10]:
depts_with_data = ['computer science', 'psychology', 'bioengineering', 'english',
       'sociology and anthropology', 'political science',
       'mechanical and industrial engineering',
       'electrical and computer engineering', 'biology', 'physics', 'history']

neu_avgs = neu_stipends[neu_stipends["Department"].isin(depts_with_data)]

In [11]:
neu_stipends_time = px.line(
    neu_avgs,
    x="Academic Year",
    y="Overall Pay",
    color="Department",
    markers=True,
    height=800
).update_layout(
    yaxis_tickprefix = '$', yaxis_tickformat = ',.'
).update_yaxes(title="Overall Pay (Average)")
neu_stipends_time.add_hline(y=63942, line_dash="dash", annotation_text="2025 Boston Living Wage: $63,942", line=dict(color="#A2A2A2"))
neu_stipends_time.add_hline(y=15650, line_dash="dash", annotation_text="Masschusetts Poverty Line: $15,650", line=dict(color="#A2A2A2"))
neu_stipends_time.add_vline(x=2025, line_dash="dot", line=dict(color="#A2A2A2"))
neu_stipends_time.show()

### Timeline

In [146]:
import numpy as np
import textwrap
import plotly.graph_objs as go

In [None]:
negotiations = pd.read_csv("data/contract_negotiations.csv")

In [14]:
negotiations["Start Date"] = pd.to_datetime(negotiations["Date"])
times = sorted(negotiations["Start Date"].unique())
times.append(pd.to_datetime("2025-05-30"))

In [49]:
def next_end(df, article, start):
    temp_times = df[df["Article"] == article]["Start Date"].unique()
    later = [x for x in temp_times if x > start]
    try:
        return sorted(later, key=lambda t: t - start)[0]
    except IndexError:
        return pd.to_datetime("2025-05-30")

negotiations["End Date"] = negotiations.apply(lambda x: next_end(negotiations, x["Article"], x["Start Date"]), axis=1)

In [156]:
topics = negotiations["Article"].unique().tolist()

def category(article):
    return f"Article Group {int(np.floor(topics.index(article)/6)) + 1}"

negotiations["Group"] = negotiations["Article"].apply(category)

In [121]:
change_count = negotiations.groupby(["Article", "Start Date"])["Party"].count().reset_index().rename(columns={"Party":"Count"})

In [122]:
change_count["Start Date"] = change_count['Start Date'].dt.strftime('%Y-%m-%d')
change_count = change_count.set_index(['Article', 'Start Date']).to_dict()['Count']

In [124]:
def changes(counts, article, start):
    return counts[(article, start.strftime('%Y-%m-%d'))]

In [129]:
negotiations["Change Count"] = negotiations.apply(lambda x: changes(change_count, x["Article"], x["Start Date"]), axis=1)

In [127]:
def split_string(elem):
    # wraps the tooltip text so that it's max 70 characters wide
    return "<br>".join(textwrap.wrap(elem))

In [128]:
negotiations["Changes from Previous Version"] = negotiations["Changes from Previous Version"].apply(split_string)

In [155]:
def negotiation_timeline(range_, grouping_):
    subset = negotiations[negotiations["Group"] == grouping_]
    timeline = px.timeline(subset, 
                x_start=subset["Start Date"], 
                x_end=subset["End Date"],
                y="Article",
                color="Party",
                custom_data=["Date", "Change Count", "Changes from Previous Version"],
                labels={"Article":""})

    timeline.update_traces(hovertemplate= 
                        "<b>Topic:</b> %{y} <br>" +
                        "<b>Date: </b> %{customdata[0]} <br>" +
                        "<b>Number of changes:</b> %{customdata[1]}<extra></extra>")
    
    present_dates=set(negotiations['Start Date']).union(set(negotiations["End Date"]))
    missing_dates = [d for d in pd.date_range(min(present_dates), max(present_dates), freq='D') if d not in present_dates]
    timeline.update_xaxes(rangebreaks = [dict(values=missing_dates)])

    timeline.update_layout(
        xaxis = dict(
            tickmode = "array",
            tickvals = times,
            ticktext = [time.date().strftime("%b %d, %y") for time in times[:-1]] + ["Present"],
            range = range_,
        ),
    )
    return timeline

timeline = negotiation_timeline([negotiations["Start Date"].min(), negotiations["End Date"].max()], "Article Group 1")

timeline.show()

In [32]:
from dash import Dash, dcc, html, callback, Output, Input

In [None]:
app = Dash()
app.layout = html.Div([
    html.Div([html.H3("Timeline of Contract Negotiations"),
    dcc.Dropdown(
        negotiations["Group"].unique().tolist(), 
        "Article Group 1", 
        id='timeline-group')], style={'width': '49%', 'display': 'inline-block'}),
    dcc.Graph(
        figure=timeline,
        id="negotiation-timeline"
    ),
    dcc.RangeSlider(
        min = 0,
        max = len(times)-1,
        step=1,
        value=[0, len(times)-1],
        marks = dict((k, v.date().strftime("%m/%d/%y")) if k is not len(times) - 1 else (k, "Present") for (k,v) in enumerate(times) ),
        id="timeline-slider"
    ),
    # placeholder text: hover table here?
])

@app.callback(
    Output("negotiation-timeline", "figure"),
    Input("timeline-slider", "value"), # dates
    Input('timeline-group', 'value') # "group" of attributes
)
def update_timeline(dates, group):
    
    return negotiation_timeline([times[dates[0]], times[dates[1]]], group)

app.run(debug=True, use_reloader = False)  # Turn off reloader if inside Jupyter