In [None]:
# Project 1 â€“ SAT Math Analysis 

import pandas as pd

# Step 1. Load dataset
sat_scores = pd.read_csv('/Users/zhanghongyuan/Desktop/hongyuan.github.io/2012_SAT_Results_20251105.csv')
sat_scores.head()


Unnamed: 0,DBN,SCHOOL NAME,Num of SAT Test Takers,SAT Critical Reading Avg. Score,SAT Math Avg. Score,SAT Writing Avg. Score
0,01M292,HENRY STREET SCHOOL FOR INTERNATIONAL STUDIES,29,355,404,363
1,01M448,UNIVERSITY NEIGHBORHOOD HIGH SCHOOL,91,383,423,366
2,01M450,EAST SIDE COMMUNITY SCHOOL,70,377,402,370
3,01M458,FORSYTH SATELLITE ACADEMY,7,414,401,359
4,01M509,MARTA VALLE HIGH SCHOOL,44,390,433,384


In [4]:
# Step 2. Data Cleaning and Preparation
sat_scores["SAT Math Avg. Score"].value_counts()

SAT Math Avg. Score
s      57
385     9
391     8
371     8
364     8
       ..
660     1
488     1
682     1
317     1
444     1
Name: count, Length: 173, dtype: int64

In [None]:
# Step 3 Pick math scores and handle missing/invalid data
# convert to numeric, coercing errors (like 's') to NaN
sat_scores["SAT Math Avg. Score"] = pd.to_numeric(sat_scores["SAT Math Avg. Score"], errors="coerce")

# drop rows where Math score is missing/invalid
sat_scores_clean = sat_scores.dropna(subset=["SAT Math Avg. Score"])
print(sat_scores_clean.head())

      DBN                                    SCHOOL NAME  \
0  01M292  HENRY STREET SCHOOL FOR INTERNATIONAL STUDIES   
1  01M448            UNIVERSITY NEIGHBORHOOD HIGH SCHOOL   
2  01M450                     EAST SIDE COMMUNITY SCHOOL   
3  01M458                      FORSYTH SATELLITE ACADEMY   
4  01M509                        MARTA VALLE HIGH SCHOOL   

  Num of SAT Test Takers SAT Critical Reading Avg. Score  SAT Math Avg. Score  \
0                     29                             355                404.0   
1                     91                             383                423.0   
2                     70                             377                402.0   
3                      7                             414                401.0   
4                     44                             390                433.0   

  SAT Writing Avg. Score  
0                    363  
1                    366  
2                    370  
3                    359  
4                

In [9]:
# Step 5. Compute mean, median, mode using pandas

mean_value_pandas   = sat_scores_clean["SAT Math Avg. Score"].mean()
median_value_pandas = sat_scores_clean["SAT Math Avg. Score"].median()
mode_value_pandas   = sat_scores_clean["SAT Math Avg. Score"].mode()

print(f"[pandas] Mean:   {mean_value_pandas}")
print(f"[pandas] Median: {median_value_pandas}")
print(f"[pandas] Mode:   {mode_value_pandas.values}")

[pandas] Mean:   413.3681710213777
[pandas] Median: 395.0
[pandas] Mode:   [385.]


In [None]:
# Step 6. Traditional way to compute mean, median, mode

scores = sat_scores_clean["SAT Math Avg. Score"]
scores_list = list(scores)

#  Mean 
mean_value_trad = sum(scores_list) / len(scores_list)

# Median 
scores_sorted = sorted(scores_list)
n = len(scores_sorted)

if n % 2 == 1:
    median_value_trad = scores_sorted[n // 2]
else:
    median_value_trad = (scores_sorted[n // 2 - 1] + scores_sorted[n // 2]) / 2

# Mode
freq = {}
for value in scores_list:
    freq[value] = freq.get(value, 0) + 1

max_count = max(freq.values())
mode_value_trad = [k for k, v in freq.items() if v == max_count]

print(" Mean:", mean_value_trad)
print(" Median:", median_value_trad)
print(" Mode:", mode_value_trad)



[traditional] Mean: 413.3681710213777
[traditional] Median: 395.0
[traditional] Mode: [385.0]


In [11]:
# Step 7 Data Visualization

# Count how often each math score appears
counts = sat_scores_clean["SAT Math Avg. Score"].value_counts().sort_index()

print("\nSAT Math Score Distribution (ASCII Chart)\n")

# Scale bars to max 40 characters
max_count = counts.max()

for score, count in counts.items():
    bar = "*" * int((count / max_count) * 40)
    print(f"{int(score)}: {bar}")



SAT Math Score Distribution (ASCII Chart)

312: ****
315: ****
317: ****
318: ****
320: ****
322: ****
323: ********
324: ****
333: ****
335: ********
337: ****
338: ****
339: ****
341: ****
342: ****
344: ****
346: ********
349: **************************
350: ****
351: ********
353: *****************
355: *************
356: **********************
357: **************************
358: *************
359: ********
360: *************
361: *****************
362: *************
363: ********
364: ***********************************
365: **********************
366: ********
367: *****************
368: *****************
369: **********************
370: **************************
371: ***********************************
372: **********************
373: *************
374: *************
375: **********************
376: **********************
377: *****************
378: *******************************
379: *******************************
380: *************
381: ***********************************