# This first block imports the libraries that we will use to analyze the data.

In [None]:
# Cell 1: Import libraries and read data
import pandas as pd
import matplotlib.pyplot as plt

# Read the CSV file
df = pd.read_csv('DiversityInStem/data/processed/CleanedEnrollmentData2022.csv')

# Display the first few rows of the dataframe
print(df.head())

In [None]:
# Cell 2: Data Preparation
# Group the data by University and IPEDS_Race, summing the Enrollment
grouped_data = df.groupby(['University', 'IPEDS_Race'])['Enrollment'].sum().unstack()

# Calculate the percentage for each race within each university
percentage_data = grouped_data.div(grouped_data.sum(axis=1), axis=0) * 100

# Display the percentage data
print(percentage_data)

In [None]:

# Cell 3: Visualization 1 - Bar Chart Comparison
plt.figure(figsize=(12, 6))
percentage_data.plot(kind='bar', stacked=True)
plt.title('Racial Composition of Universities (2022)')
plt.xlabel('University')
plt.ylabel('Percentage')
plt.legend(title='Race/Ethnicity', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
# Cell 4: Visualization 2 - Pie Charts
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 7))

# Pie chart for Baccalaureate Colleges
percentage_data.loc['Baccalaureate Colleges'].plot(kind='pie', ax=ax1, autopct='%1.1f%%')
ax1.set_title('Racial Composition of Baccalaureate Colleges (2022)')
ax1.set_ylabel('')

# Pie chart for Berea College
percentage_data.loc['Berea College'].plot(kind='pie', ax=ax2, autopct='%1.1f%%')
ax2.set_title('Racial Composition of Berea College (2022)')
ax2.set_ylabel('')

plt.tight_layout()
plt.show()

## Horizontal Bar Chart

In [None]:
# Cell 5: Visualization 3 - Horizontal Bar Chart for Comparison
plt.figure(figsize=(10, 8))
percentage_data.plot(kind='barh', stacked=True)
plt.title('Racial Composition Comparison (2022)')
plt.xlabel('Percentage')
plt.ylabel('University')
plt.legend(title='Race/Ethnicity', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()