In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import ipaddress

# Step 1: Load file
print("Step 1: Load all columns and show head")
df = pd.read_csv("asn-ipv4.csv", header=None)

print(df.head())
print("Data shape:", df.shape)

# Step 2: Extract first column as IP and validate properly
print("\nStep 2: Extract the first column as IP, clean and validate")
df["ip"] = df.iloc[:, 0].astype(str).str.strip()

def is_valid_ipv4(x):
    try:
        ipaddress.IPv4Address(x)
        return True
    except ValueError:
        return False

df = df[df["ip"].apply(is_valid_ipv4)]

print(df[["ip"]].head())

# Step 3: Extract /16 prefix (first two octets)
print("\nStep 3: Extract /16 prefix")
df["prefix_16"] = df["ip"].str.split(".").str[:2].str.join(".")
print(df[["ip", "prefix_16"]].head())

# Step 4: Count /16 prefixes
print("\nStep 4: Count /16 prefixes")
prefix_df = (
    df["prefix_16"]
    .value_counts()
    .rename_axis("Prefix_16")
    .reset_index(name="Count")
)

print(prefix_df.head())

# Step 5: Visualize top 20 /16 prefixes
print("\nStep 5: Visualize top 20 /16 prefixes")
top20 = prefix_df.head(20)

plt.figure(figsize=(14, 6))
plt.bar(top20["Prefix_16"], top20["Count"], color="steelblue")
plt.xticks(rotation=45, ha="right")
plt.title("Top 20 /16 Prefixes by Count")
plt.xlabel("/16 Prefix")
plt.ylabel("Count")
plt.tight_layout()
plt.show()
