Analyzing the relative attendance rate for each college

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

college = pd.read_csv('colleges.csv')

import plotly.graph_objects as go

# HODP colors
monochrome_colors = ['#251616', '#760000', '#C63F3F', '#E28073', '#F1D3CF']
bold_colors = px.colors.qualitative.Bold
new_colors = ['#3f7eeb', '#000000']
primary_colors = bold_colors + new_colors

# HODP template
theme_hodp = go.layout.Template(
    layout=go.Layout(
        title = {'font':{'size':24, 'family':"Helvetica", 'color':monochrome_colors[0]}, 'pad':{'t':100, 'r':0, 'b':0, 'l':0}},
        font = {'size':18, 'family':'Helvetica', 'color':'#717171'},
        xaxis = {'ticks': "outside",
                'tickfont': {'size': 14, 'family':"Helvetica"},
                'showticksuffix': 'all',
                'showtickprefix': 'last',
                'showline': True,
                'title':{'font':{'size':18, 'family':'Helvetica'}, 'standoff':20},
                'automargin': True
                },
        yaxis = {'ticks': "outside",
                'tickfont': {'size': 14, 'family':"Helvetica"},
                'showticksuffix': 'all',
                'showtickprefix': 'last',
                'title':{'font':{'size':18, 'family':'Helvetica'}, 'standoff':20},
                'showline': True,
                'automargin': True
                },
        legend = {'bgcolor':'rgba(0,0,0,0)', 
                'title':{'font':{'size':18, 'family':"Helvetica", 'color':monochrome_colors[0]}}, 
                'font':{'size':14, 'family':"Helvetica"}, 
                'yanchor':'bottom'
                },
        colorscale = {'diverging':monochrome_colors},
        coloraxis = {'autocolorscale':True, 
                'cauto':True, 
                'colorbar':{'tickfont':{'size':14,'family':'Helvetica'}, 'title':{'font':{'size':18, 'family':'Helvetica'}}},
                }
    )
)

# Raw Attendance

In [2]:
import pandas as pd
import plotly.graph_objects as go

# Load data
df = pd.read_csv('bracket.csv')

# Calculate the percentage of each income bracket relative to the total attendance
total_attendance = df['Attendance'].sum()
df['Percentage'] = (df['Attendance'] / total_attendance) * 100
df['Label'] = df.iloc[:, 2].astype(str)

fig = go.Figure()

# Create a percentage bar chart
fig.add_trace(go.Bar(
    x=df['par_income_lab'],
    y=df['Percentage'],
    text=df['Label'],
    textfont=dict(size=9),
    marker_color=primary_colors,
))

fig.add_shape(
    dict(
        type='line',
        x0=-0.5,
        x1=9.5,
        y0=10,
        y1=10,
        line=dict(color='black', dash='dash'),
    )
)

fig.update_layout(title="Proportion of Students Attending Ivy-Plus Schools by Parent Income Percentile",
                  xaxis={'title': 'Parent Income Percentile'},
                  yaxis={'title': 'Percentage of Attendance'},
                  yaxis_type="linear",
)

fig.show()


In [3]:
x = college['par_income_lab'][0:14]

order = ['0-20', '20-40', '40-60', '60-70', '70-80', '80-90', '90-95', '95-96', '96-97', '97-98', '98-99', '99-99.9','Top 1' , 'Top 0.1']
college['par_income_lab'] = pd.Categorical(college['par_income_lab'], categories=order, ordered=True)

fig = go.Figure()
# Create average line
avg_values = college.groupby('par_income_lab')['rel_attend'].mean().reset_index()
avg_values_unwgt = college.groupby('par_income_lab')['rel_attend_unwgt'].mean().reset_index()

fig.add_trace(go.Scatter(
    x=x,
    y=avg_values_unwgt['rel_attend_unwgt'],
    marker_color=primary_colors[1],
    name="Not Adjusted for SAT"
))

fig.add_trace(go.Scatter(
    x=x,
    y=avg_values['rel_attend'],
    marker_color=primary_colors[11],
    name="Adjusted for SAT"
))

fig.add_shape(
    dict(
        type='line',
        x0=-1,
        x1=14,
        y0=1,
        y1=1,
        line=dict(color='gray', dash='dash'),
    )
)
fig.update_layout(title="Rate of Students Attending Ivy-Plus Colleges, Adjusted for SAT", 
                xaxis={'title':{'text':'Parent Income Percentile'}}, 
                yaxis={'title':{'text':'Attendance Rates'}}, 
                yaxis_type="linear")

fig.show()


# Average Attendance

# Relative Attendance with schools

In [4]:
college['rel_attend'] = pd.to_numeric(college['rel_attend'], errors='coerce')
x = college['par_income_lab'][0:14]

order = ['0-20', '20-40', '40-60', '60-70', '70-80', '80-90', '90-95', '95-96', '96-97', '97-98', '98-99', '99-99.9','Top 1' , 'Top 0.1']
college['par_income_lab'] = pd.Categorical(college['par_income_lab'], categories=order, ordered=True)

fig = go.Figure()
# Create average line
avg_values = college.groupby('par_income_lab')['rel_attend'].mean().reset_index()

# Define the legend
legend_values = [(f"Average: ×{round(avg_values['rel_attend'][13], 1)}", len(college))]

# Iterate over each set of y values and college names
for i in range(0, len(college), 14):
    school_data = college.iloc[i:i+14]  
    legend_value = round(school_data.loc[school_data['par_income_lab'] == 'Top 0.1', 'rel_attend'].values[0], 1)
    legend_values.append((f"{school_data['name'].iloc[0]}: ×{legend_value}", i))

# Sort the legend values based on the numerical part, including 'Average'
legend_values.sort(key=lambda x: float(x[0].split('×')[-1]), reverse=True)

# Add figure
for legend_text, i in legend_values:
    # Make average graph
    y_values = avg_values['rel_attend'] if legend_text.startswith('Average') else college.iloc[i:i+14]['rel_attend'].tolist()
    line_style = 'dash' if legend_text.startswith('Average') else 'solid'
    fig.add_trace(go.Scatter(
        x=x,
        y=y_values,
        name=legend_text,
        marker_color=primary_colors[i % len(primary_colors)],
        line=dict(dash=line_style)
    ))

fig.add_shape(
    dict(
        type='line',
        x0=-1,
        x1=14,
        y0=1,
        y1=1,
        line=dict(color='gray', dash='dash'),
    )
)


fig.update_layout(title="Rate of Students Attending Ivy-Plus Colleges Based On Income Bracket", 
                xaxis={'title':{'text':'Parent Income Percentile'}}, 
                yaxis={'title':{'text':'Representation Relative to Population Share'}}, 
                legend={'title':{'text':'Schools'}},
                yaxis_type="linear")

fig.show()


In [5]:
import pandas as pd
import plotly.graph_objects as go

# Load data
df = pd.read_csv('income.csv')

schools = df.columns[1:]
median_incomes = df.iloc[0, 1:]

# sort values
sorted_indices = median_incomes.argsort()
schools = schools[sorted_indices]
median_incomes = median_incomes[sorted_indices]

# Colors
num_colors = len(primary_colors)
repeated_colors = [primary_colors[i % num_colors] for i in range(len(schools))]

fig = go.Figure()

fig.add_trace(go.Bar(
    x=["Median family income"],
    y=[68010],
    marker_color=primary_colors[12],
    showlegend=False
))

fig.add_trace(go.Bar(
    x=schools,
    y=median_incomes,
    marker_color=repeated_colors,
    showlegend=False

))


fig.update_layout(
    title='Median Family Income by University',
    xaxis=dict(title='University'),
    yaxis=dict(title='Median Family Income ($)', type='linear', range=[0, 250000])
)

fig.show()


In [6]:
income_data = {'Income Percentile': ['0-20', '20-40', '40-60', '60-70', '70-80', '80-90', '90-95', '95-96', '96-97', '97-98', '98-99', '99-99.9', 'Top 1', 'Top 0.1'],
               'Dollar Range': ['<$23k', '$23k-$43k', '$43k-$73k', '$73k-$91k', '$91k-$114k', '$114k-$158k', '$158k-$22kk', '$222k-$251k', '$251k-$297k', '$297k-$380k', '$380k-$611k', '$611k-$2.7m', '$611k-$2.7m', '>$2.7m']}
income_df = pd.DataFrame(income_data)

# Create a Table
table = go.Figure(data=[go.Table(
    header=dict(values=list(income_df.columns)),
    cells=dict(values=[income_df['Income Percentile'], income_df['Dollar Range']])
)])

# Update layout
table.update_layout(title='Income Percentile and Dollar Ranges',
                    margin=dict(l=0, r=0, t=40, b=0))

# Show the table
table.show()


In [7]:
percents = pd.read_csv('percents.csv')
df = pd.read_csv('percents.csv')

schools = percents.columns[1:]
percentBottom20 = percents.iloc[7, 1:]
percentTopTenth = percents.iloc[2, 1:]

sorted_data = sorted(zip(schools, percentBottom20, percentTopTenth), key=lambda x: x[1], reverse=True)
sorted_schools = [x[0] for x in sorted_data]
sorted_percentBottom20 = [x[1] for x in sorted_data]
sorted_percentTopTenth = [x[2] for x in sorted_data]

fig = go.Figure()

fig.add_trace(go.Bar(
    x=sorted_schools,
    y=sorted_percentBottom20,
    name='Bottom 20%',
    marker_color=monochrome_colors[1]
))

fig.add_trace(go.Bar(
    x=sorted_schools,
    y=sorted_percentTopTenth,
    name='Top 0.1%',
    marker_color=monochrome_colors[3]
))

fig.update_layout(
    title='Share of Students from the Bottom 20% vs. the Top 0.1% at Ivy-Plus Universities',
    xaxis=dict(title='University'),
    yaxis=dict(title='Percent of Student Body', type='linear')
)

fig.show()


In [8]:
percents = pd.read_csv('percents.csv')
df = pd.read_csv('percents.csv')

schools = percents.columns[1:]
percentBottom20 = percents.iloc[7, 1:]
percentTop20 = percents.iloc[6, 1:]

sorted_data = sorted(zip(schools, percentBottom20, percentTop20), key=lambda x: x[1], reverse=True)
sorted_schools = [x[0] for x in sorted_data]
sorted_percentBottom20 = [x[1] for x in sorted_data]
sorted_percentTop20 = [x[2] for x in sorted_data]

fig = go.Figure()

fig.add_trace(go.Bar(
    x=sorted_schools,
    y=sorted_percentBottom20,
    name='Bottom 20%',
    marker_color=monochrome_colors[1]
))

fig.add_trace(go.Bar(
    x=sorted_schools,
    y=sorted_percentTop20,
    name='Top 20%',
    marker_color=monochrome_colors[2]
))

fig.add_shape(
    dict(
        type='line',
        x0=-.5, 
        x1=len(schools),
        y0=20,
        y1=20,
        line=dict(color=monochrome_colors[0], dash='dash'),
    )
)

fig.update_layout(
    title='Share of Students from the Bottom 20% vs. the Top 20% at Ivy-Plus Universities',
    xaxis=dict(title='University'),
    yaxis=dict(title='Percent of Student Body', type='linear')
)

fig.show()


In [9]:
college['rel_attend_unwgt'] = pd.to_numeric(college['rel_attend_unwgt'], errors='coerce')
x = college['par_income_lab'][0:14]

order = ['0-20', '20-40', '40-60', '60-70', '70-80', '80-90', '90-95', '95-96', '96-97', '97-98', '98-99', '99-99.9','Top 1' , 'Top 0.1']
college['par_income_lab'] = pd.Categorical(college['par_income_lab'], categories=order, ordered=True)

fig = go.Figure()
# Create average line
avg_values = college.groupby('par_income_lab')['rel_attend_unwgt'].mean().reset_index()

# Define the legend
legend_values = [(f"Average: ×{round(avg_values['rel_attend_unwgt'][13], 1)}", len(college))]

# Iterate over each set of y values and college names
for i in range(0, len(college), 14):
    school_data = college.iloc[i:i+14]  
    legend_value = round(school_data.loc[school_data['par_income_lab'] == 'Top 0.1', 'rel_attend_unwgt'].values[0], 1)
    legend_values.append((f"{school_data['name'].iloc[0]}: ×{legend_value}", i))

# Sort the legend values based on the numerical part, including 'Average'
legend_values.sort(key=lambda x: float(x[0].split('×')[-1]), reverse=True)

# Add figure
for legend_text, i in legend_values:
    # Make average graph
    y_values = avg_values['rel_attend_unwgt'] if legend_text.startswith('Average') else college.iloc[i:i+14]['rel_attend_unwgt'].tolist()
    line_style = 'dash' if legend_text.startswith('Average') else 'solid'
    fig.add_trace(go.Scatter(
        x=x,
        y=y_values,
        name=legend_text,
        marker_color=primary_colors[i % len(primary_colors)],
        line=dict(dash=line_style)
    ))


fig.update_layout(title="Rate of Students Attending Ivy-Plus Colleges Based On Income Bracket", 
                xaxis={'title':{'text':'Relative Attendance Rates'}}, 
                yaxis={'title':{'text':'Parent Income Percentile'}}, 
                legend={'title':{'text':'Schools'}},
                yaxis_type="linear")

fig.show()


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=12441a0d-cba4-41e8-b639-7c2c88b6450e' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>