# Project 1: COVID-19 Daily Counts of Cases, Hospitalizations, and Deaths

## Load Data and Preview

In [36]:
import pandas as pd

url = "https://data.cityofnewyork.us/resource/rc75-m7u3.csv"
df = pd.read_csv(url)

In [37]:
# Preview the head rows
df.head()

# Preview the variables (Column Names)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 55 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   date_of_interest                 1000 non-null   object
 1   case_count                       1000 non-null   int64 
 2   probable_case_count              1000 non-null   int64 
 3   hospitalized_count               1000 non-null   int64 
 4   death_count                      1000 non-null   int64 
 5   case_count_7day_avg              1000 non-null   int64 
 6   all_case_count_7day_avg          1000 non-null   int64 
 7   hosp_count_7day_avg              1000 non-null   int64 
 8   death_count_7day_avg             1000 non-null   int64 
 9   bx_case_count                    1000 non-null   int64 
 10  bx_probable_case_count           1000 non-null   int64 
 11  bx_hospitalized_count            1000 non-null   int64 
 12  bx_death_count                   10

## Calculate the Mean, Median, and Mode

In [38]:
df["avg_death"] = pd.to_numeric(df["death_count"], errors="coerce")

mean_val = df["avg_death"].mean()
median_val = df["avg_death"].median()
mode_val = df["avg_death"].mode()[0]

# Calculate the Mean, Median, and Mode
print("Mean AVG death:  ", mean_val)
print("Median AVG death:", median_val)
print("Mode AVG death:  ", mode_val)

Mean AVG death:   43.116
Median AVG death: 13.0
Mode AVG death:   5


# The Hard Way

In [53]:
# Extract target value from the dataset and convert into a list
death_counts = []

with open("COVID-19 Daily Counts.csv") as f:
    header = True
    for line in f:
        line = line.strip()        # get rid of newline character

        if header:
            header = False         # skip header
            continue

        parts = line.split(",")    # use "," to separate every column

        deaths = parts[4].strip()
        clean = ''.join(ch for ch in deaths if ch.isdigit())
        if clean != "":
            death_counts.append(int(clean))

# Step 2: Mean
mean_death = sum(death_counts) / len(death_counts)
print("Mean:", mean_death)

# Median
sorted_deaths = sorted(death_counts)
n = len(sorted_deaths)
if n % 2 == 1:
    median_death = sorted_deaths[n // 2]
else:
    median_death = (sorted_deaths[n // 2 - 1] + sorted_deaths[n // 2]) / 2
print("Median:", median_death)

# Mode
counts = {}
for s in death_counts:
    counts[s] = counts.get(s, 0) + 1
max_count = max(counts.values())
modes = [v for v, c in counts.items() if c == max_count]
print("Mode(s):", modes[0])


Mean: 43.116
Median: 13.0
Mode(s): 5


# Visualization

In [54]:
import pandas as pd

# ------------------------------
# Step 1: Read CSV using pandas
# ------------------------------

df = pd.read_csv("COVID-19 Daily Counts.csv")

# Convert SAT math column to numeric
# invalid values like "s" become NaN
death_counts = pd.to_numeric(df["death_count"], errors="coerce")

# Remove NaN values
death_counts = death_counts.dropna()

# Convert to Python list
death_counts = death_counts.tolist()

print("Loaded death counts:", death_counts[:10])

# ------------------------------
# Step 2: ASCII histogram
# ------------------------------

def ascii_hist(values, bin_size=20):
    """
    Draw a simple ASCII histogram using only Python standard library.
    """
    bins = {}

    # Count values into bins
    for v in values:
        bin_label = (int(v) // bin_size) * bin_size  # e.g., 432 -> 420
        bins[bin_label] = bins.get(bin_label, 0) + 1

    # Print histogram sorted
    for b in sorted(bins.keys()):
        bar = "#" * bins[b]
        print(f"{b:3d}-{b+bin_size-1:3d}: {bar}")

# ------------------------------
# Step 3: Call histogram
# ------------------------------

print("\nASCII Histogram of Death Counts:\n")
ascii_hist(death_counts)

Loaded death counts: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

ASCII Histogram of Death Counts:

  0- 19: ###########################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################
 20- 39: #############################################################################################
 40- 59: ##############################################################################
 60- 79: ####################################