In [4]:
import pandas as pd

# Load the dataset
data = pd.read_csv('College.csv')

# Remove the 'Unnamed: 0' column (college names)
data = data.drop(columns=['Unnamed: 0'])

# Convert the 'Private' column to binary (Yes -> 1, No -> 0)
data['Private'] = data['Private'].apply(lambda x: 1 if x == 'Yes' else 0)

# Add a dictionary for variable descriptions
variable_descriptions = {
    "Private": "Private or public college (1: Private, 0: Public)",
    "Apps": "Number of applications received",
    "Accept": "Number of students accepted",
    "Enroll": "Number of students enrolled",
    "Top10perc": "Percentage of enrolled students in top 10% of their high school class",
    "Top25perc": "Percentage of enrolled students in top 25% of their high school class",
    "F.Undergrad": "Number of full-time undergraduate students",
    "P.Undergrad": "Number of part-time undergraduate students",
    "Outstate": "Tuition for out-of-state students",
    "Room.Board": "Room and board costs",
    "Books": "Estimated cost of books",
    "Personal": "Estimated personal spending",
    "PhD": "Percentage of faculty with a Ph.D.",
    "Terminal": "Percentage of faculty with terminal degree",
    "S.F.Ratio": "Student-to-faculty ratio",
    "perc.alumni": "Percentage of alumni who donate",
    "Expend": "Instructional expenditure per student",
    "Grad.Rate": "Graduation rate"
}

# Generate basic statistics
numeric_stats = data.describe().transpose()

# Add median and IQR for further insight
numeric_stats['median'] = data.median()
numeric_stats['IQR'] = numeric_stats['75%'] - numeric_stats['25%']

# Combine descriptions and statistics into a single DataFrame
stats_with_description = pd.DataFrame.from_dict(variable_descriptions, orient='index', columns=['Description'])
stats_with_description = stats_with_description.join(numeric_stats, how='left')

# Save to CSV for report purposes
stats_with_description.to_csv('college_data_with_descriptions.csv')

# Display the results in the console
print(stats_with_description)


                                                   Description  count  \
Private      Private or public college (1: Private, 0: Public)  777.0   
Apps                           Number of applications received  777.0   
Accept                             Number of students accepted  777.0   
Enroll                             Number of students enrolled  777.0   
Top10perc    Percentage of enrolled students in top 10% of ...  777.0   
Top25perc    Percentage of enrolled students in top 25% of ...  777.0   
F.Undergrad         Number of full-time undergraduate students  777.0   
P.Undergrad         Number of part-time undergraduate students  777.0   
Outstate                     Tuition for out-of-state students  777.0   
Room.Board                                Room and board costs  777.0   
Books                                  Estimated cost of books  777.0   
Personal                           Estimated personal spending  777.0   
PhD                         Percentage of faculty w