Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 172 additions & 0 deletions plots/manhattan-gwas/implementations/matplotlib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
""" pyplots.ai
manhattan-gwas: Manhattan Plot for GWAS
Library: matplotlib 3.10.8 | Python 3.13.11
Quality: 92/100 | Created: 2025-12-31
"""

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


# Data - Simulate GWAS results for 22 chromosomes
np.random.seed(42)

# Define chromosome sizes (approximate in Mb, scaled down for simulation)
chrom_sizes = {
1: 249,
2: 243,
3: 198,
4: 191,
5: 182,
6: 171,
7: 159,
8: 146,
9: 141,
10: 136,
11: 135,
12: 134,
13: 115,
14: 107,
15: 103,
16: 90,
17: 81,
18: 78,
19: 59,
20: 63,
21: 48,
22: 51,
}

# Generate SNPs for each chromosome
chromosomes = []
positions = []
p_values = []

for chrom, size in chrom_sizes.items():
n_snps = int(size * 40) # ~40 SNPs per Mb
chrom_positions = np.sort(np.random.randint(1, size * 1_000_000, n_snps))

# Generate p-values (mostly non-significant, with some significant peaks)
chrom_pvals = np.random.uniform(0, 1, n_snps)

# Add some significant SNPs in certain chromosomes (simulating real signals)
if chrom in [2, 6, 11, 16]:
peak_idx = np.random.choice(n_snps, size=np.random.randint(3, 8), replace=False)
chrom_pvals[peak_idx] = 10 ** (-np.random.uniform(8, 15, len(peak_idx)))

# Add suggestive hits in more chromosomes
if chrom in [1, 3, 8, 12, 19]:
suggestive_idx = np.random.choice(n_snps, size=np.random.randint(2, 5), replace=False)
chrom_pvals[suggestive_idx] = 10 ** (-np.random.uniform(5, 7.5, len(suggestive_idx)))

chromosomes.extend([chrom] * n_snps)
positions.extend(chrom_positions)
p_values.extend(chrom_pvals)

# Create DataFrame
df = pd.DataFrame({"chromosome": chromosomes, "position": positions, "p_value": p_values})

# Calculate -log10(p-value)
df["-log10p"] = -np.log10(df["p_value"])

# Calculate cumulative position for x-axis
df["chrom_num"] = df["chromosome"]
df = df.sort_values(["chrom_num", "position"])

# Add cumulative position offset
cumulative_offset = 0
chrom_centers = {}
for chrom in sorted(df["chrom_num"].unique()):
chrom_mask = df["chrom_num"] == chrom
df.loc[chrom_mask, "cumulative_pos"] = df.loc[chrom_mask, "position"] + cumulative_offset
chrom_centers[chrom] = cumulative_offset + df.loc[chrom_mask, "position"].median()
cumulative_offset += df.loc[chrom_mask, "position"].max() + 10_000_000 # Gap between chromosomes

# Define thresholds
genome_wide_threshold = -np.log10(5e-8) # ~7.3
suggestive_threshold = -np.log10(1e-5) # 5

# Define colors
colors = ["#306998", "#6699CC"] # Python Blue and lighter blue for alternating

# Create plot
fig, ax = plt.subplots(figsize=(16, 9))

# Plot points by chromosome with alternating colors
for i, chrom in enumerate(sorted(df["chrom_num"].unique())):
chrom_data = df[df["chrom_num"] == chrom]
color = colors[i % 2]

# Smaller markers for dense data, slightly larger for significant hits
significant_mask = chrom_data["-log10p"] >= genome_wide_threshold
regular_data = chrom_data[~significant_mask]
significant_data = chrom_data[significant_mask]

# Plot regular points
ax.scatter(
regular_data["cumulative_pos"],
regular_data["-log10p"],
c=color,
s=15,
alpha=0.6,
edgecolors="none",
rasterized=True,
)

# Plot significant points with emphasis
if len(significant_data) > 0:
ax.scatter(
significant_data["cumulative_pos"],
significant_data["-log10p"],
c="#E74C3C", # Red for significant hits
s=50,
alpha=0.9,
edgecolors="white",
linewidths=0.5,
zorder=5,
rasterized=True,
)

# Add threshold lines
ax.axhline(
y=genome_wide_threshold,
color="#E74C3C",
linestyle="--",
linewidth=2,
label="Genome-wide significance (p < 5×10⁻⁸)",
alpha=0.8,
)
ax.axhline(
y=suggestive_threshold,
color="#FFD43B",
linestyle="--",
linewidth=2,
label="Suggestive threshold (p < 1×10⁻⁵)",
alpha=0.8,
)

# Set x-axis with chromosome labels
ax.set_xticks([chrom_centers[c] for c in sorted(chrom_centers.keys())])
ax.set_xticklabels([str(c) for c in sorted(chrom_centers.keys())], fontsize=14)
ax.set_xlim(0, df["cumulative_pos"].max() * 1.01)

# Set y-axis
ax.set_ylim(0, df["-log10p"].max() * 1.1)

# Labels and styling
ax.set_xlabel("Chromosome", fontsize=20)
ax.set_ylabel("-log₁₀(p-value)", fontsize=20)
ax.set_title("manhattan-gwas · matplotlib · pyplots.ai", fontsize=24)
ax.tick_params(axis="y", labelsize=16)
ax.tick_params(axis="x", labelsize=14)

# Legend
ax.legend(fontsize=14, loc="upper right", framealpha=0.9)

# Remove top and right spines for cleaner look
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

plt.tight_layout()
plt.savefig("plot.png", dpi=300, bbox_inches="tight")
27 changes: 27 additions & 0 deletions plots/manhattan-gwas/metadata/matplotlib.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
library: matplotlib
specification_id: manhattan-gwas
created: '2025-12-31T05:31:23Z'
updated: '2025-12-31T05:37:51Z'
generated_by: claude-opus-4-5-20251101
workflow_run: 20612787947
issue: 2925
python_version: 3.13.11
library_version: 3.10.8
preview_url: https://storage.googleapis.com/pyplots-images/plots/manhattan-gwas/matplotlib/plot.png
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/manhattan-gwas/matplotlib/plot_thumb.png
preview_html: null
quality_score: 92
review:
strengths:
- Excellent chromosome visualization with alternating blue colors providing clear
visual distinction between chromosomes
- Proper highlighting of significant SNPs with larger red markers and white edges
that stand out clearly
- Realistic GWAS data simulation with appropriate chromosome sizes, SNP densities,
and significant peaks on specific chromosomes
- Clean code structure with good use of rasterized=True for performance optimization
with 110k+ data points
- Both genome-wide and suggestive threshold lines included with clear legend
weaknesses:
- No grid lines present (minor - acceptable for Manhattan plots to reduce clutter)
- Y-axis label could include clearer notation