In [None]:
import pandas as pd

# Load a file of yellow taxi data
df = pd.read_csv('yellow_taxi.csv')

# Choose one numeric column
col = 'trip_distance'
data = df[col].dropna().tolist()

[0.97, 1.1, 2.51, 1.9, 1.43, 1.84, 1.66, 11.7, 2.95, 3.01, 1.8, 7.3, 3.23, 11.43, 2.95, 1.52, 2.23, 4.5, 1.2, 2.5, 1.4, 17.8, 0.8, 1.7, 5.7, 1.18, 2.42, 2.3, 0.7, 1.2, 4.1, 9.5, 2.02, 2.17, 2.41, 0.6, 5.01, 3.24, 2.7, 1.2, 1.3, 2.32, 2.35, 0.61, 2.27, 0.82, 1.72, 1.75, 11.11, 0.2, 2.4, 2.9, 1.2, 1.6, 3.3, 2.35, 3.4, 1.15, 0.86, 16.02, 3.31, 0.03, 1.38, 2.04, 11.3, 3.31, 9.0, 3.0, 0.4, 0.3, 1.3, 0.3, 1.7, 2.79, 2.91, 1.69, 0.82, 0.52, 1.97, 3.1, 0.86, 0.64, 4.41, 0.64, 1.08, 3.04, 1.71, 3.05, 1.29, 11.19, 11.03, 13.54, 1.4, 1.1, 3.72, 1.6, 0.08, 6.22, 4.56, 1.41, 1.7, 1.14, 6.19, 1.0, 3.0, 3.7, 3.15, 1.75, 0.71, 0.67, 6.14, 9.7, 1.16, 1.44, 2.73, 3.53, 0.7, 1.1, 9.0, 4.53, 1.02, 2.86, 1.91, 5.62, 0.74, 3.07, 7.05, 2.61, 6.0, 1.7, 1.9, 0.8, 4.3, 0.42, 0.42, 1.21, 0.8, 4.59, 6.81, 3.47, 0.44, 1.93, 1.89, 0.7, 1.98, 1.82, 6.08, 4.81, 0.98, 1.5, 2.58, 1.46, 3.55, 2.13, 1.97, 0.48, 0.75, 0.9, 1.72, 1.32, 1.04, 0.41, 1.28, 0.8, 2.1, 1.53, 1.55, 1.27, 2.7, 19.2, 20.37, 0.76, 1.2, 1.47, 1.69, 2

In [None]:
#Calculate mean, median, and mode using pandas

mean_val = df[col].mean()
median_val = df[col].median()
mode_val = df[col].mode()[0]

print(f"Mean: {mean_val:.2f}")
print(f"Median: {median_val:.2f}")
print(f"Mode: {mode_val:.2f}")

Mean: 2.96
Median: 1.93
Mode: 1.60


In [12]:
#Calculate mean, median, and mode manually
import csv

#Read numeric column manually
values = []
with open('yellow_taxi.csv', newline='') as f:
    reader = csv.DictReader(f)
    for row in reader:
        try:
            val = float(row['trip_distance'])
            values.append(val)
        except (ValueError, KeyError):
            continue

#Compute mean
def compute_mean(nums):
    total = 0
    count = 0
    for n in nums:
        total += n
        count += 1
    return total / count if count != 0 else None

#Compute median
def compute_median(nums):
    nums_sorted = sorted(nums)
    n = len(nums_sorted)
    if n == 0:
        return None
    mid = n // 2
    if n % 2 == 0:
        return (nums_sorted[mid - 1] + nums_sorted[mid]) / 2
    else:
        return nums_sorted[mid]

# Compute mode
def compute_mode(nums):
    freq = {}
    for n in nums:
        freq[n] = freq.get(n, 0) + 1
    max_count = max(freq.values())
    # return the smallest/largest if multiple modes
    modes = [k for k, v in freq.items() if v == max_count]
    if len(modes) == 1:
        return modes[0]
    else:
        return modes  # return list if multiple modes


print("Mean:", compute_mean(values))
print("Median:", compute_median(values))
print("Mode:", compute_mode(values))

Mean: 2.9619400000000016
Median: 1.93
Mode: 1.6


In [11]:
# --- Create ASCII histogram (no external packages) ---
# Bin the data manually

min_val = 0
max_val = 20  # ignore long outliers
bin_size = 2
bins = range(min_val, max_val + bin_size, bin_size)
counts = [0] * (len(bins)-1)

# Count how many values fall into each bin
for v in data:
    if v < max_val:
        index = int(v // bin_size)
        counts[index] += 1

# Normalize to make bars fit the screen width (max 50 chars)
max_count = max(counts)
scale = 50 / max_count

# Print histogram
print("\nASCII Histogram of Trip Distances (miles)")
print("-" * 60)
for i, count in enumerate(counts):
    bar = "*" * int(count * scale)
    label = f"{bins[i]:2d}-{bins[i+1]:2d} mi"
    print(f"{label:10} | {bar}")


ASCII Histogram of Trip Distances (miles)
------------------------------------------------------------
 0- 2 mi   | **************************************************
 2- 4 mi   | ***************************
 4- 6 mi   | *********
 6- 8 mi   | ***
 8-10 mi   | *
10-12 mi   | *
12-14 mi   | 
14-16 mi   | 
16-18 mi   | 
18-20 mi   | 
