PANDAS

In [1]:
import pandas as pd

# Load the dataset
df = pd.read_csv("data.csv")
col = "SCORE"

mean = df[col].mean()
median = df[col].median()
mode = df[col].mode()[0]

print("Mean:", mean)
print("Median:", median)
print("Mode:", mode)

Mean: 25.014655828294558
Median: 21.0
Mode: 12.0


HARDWAY

In [3]:
import csv

values = []
skipped = 0
# Open with UTF-8 and be explicit about newline handling to avoid decoding errors on Windows
with open("data.csv", newline="", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        # replace with your column name
        raw = row.get("SCORE", "")
        if raw is None:
            skipped += 1
            continue
        raw = raw.strip()
        if raw == "":
            skipped += 1
            continue
        # remove common thousands separators and handle percent signs if present
        try:
            cleaned = raw.replace(',', '')
            if cleaned.endswith('%'):
                cleaned = cleaned[:-1]
            value = float(cleaned)
            values.append(value)
        except Exception:
            # couldn't parse this row as a number; skip it
            skipped += 1
            continue

# Mean
if len(values) == 0:
    print("No numeric SCORE values found in data.csv. Skipped rows:", skipped)
    raise SystemExit(0)

mean = sum(values) / len(values)

# Median
sorted_vals = sorted(values)
n = len(sorted_vals)
if n % 2 == 1:
    median = sorted_vals[n // 2]
else:
    median = (sorted_vals[n//2 - 1] + sorted_vals[n//2]) / 2

# Mode
counts = {}
for v in values:
    counts[v] = counts.get(v, 0) + 1
mode = max(counts, key=counts.get)

print("Mean:", mean)
print("Median:", median)
print("Mode:", mode)
print(f"(Processed {len(values)} numeric rows, skipped {skipped} rows)")

# Bucket scores into ranges of 10
buckets = {}
for s in values:
    bucket = int(s // 10) * 10
    buckets[bucket] = buckets.get(bucket, 0) + 1

print("\nScore Distribution:\n")
for bucket in sorted(buckets.keys()):
    bar = "游비" * buckets[bucket]
    print(f"{bucket:3d}-{bucket+9:3d}: {bar}")

Mean: 25.014655828294558
Median: 21.0
Mode: 12.0
(Processed 274703 numeric rows, skipped 16309 rows)

Score Distribution:

  0-  9: 游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비游비