Read data

In [2]:
v = pd.read_csv("../input_tab/merged.tab.gz",sep="\t",header=None)
v.columns = ["scaf", "start", "end", 
             "rrate", "cpgi", "prdm9", "spot", 
             "spot_id", "species", "kind"]

Select some random spots to plot

In [125]:
n = 50
ran_spots = []
for (kind,sp),df in v.groupby(["species", "kind"]):
    spot_ids = df.spot_id.unique()
    ran_spots.append([random.choice(spot_ids) for i in range(n)])
ran_spots = sum(ran_spots, [])

Plotting

In [126]:
#imax = 10
subd = v[v.spot_id.isin(ran_spots)]

for i,((kind,sp,spot_id),df) in enumerate(subd.groupby(["kind", "species", "spot_id"])):
        
    df = df.sort_values(by="start")

    # X limits
    start, end = map(int, spot_id.split(":")[-1].split("-"))
    max_start  = start-20e3
    max_end    = end+20e3
    
    f,ax = plt.subplots()
    xs = sum([[r.start, r.end] for i,r in df.iterrows()], [])
    ys = sum([[r.rrate, r.rrate] for i,r in df.iterrows()], [])
    
    # Relative rate
    norm_y = np.mean(ys)
    ys = ys/norm_y
    
    # Plot recombination rates
    ax.step(xs, ys, 
            color="gray", 
            alpha=0.7)
    ax.scatter(xs, ys, 
               color="gray", 
               alpha=0.7, 
               s=3)

    ax.axhline(y=0, 
               linestyle="dotted", 
               color="lightgray", 
               linewidth=1)
    
    # Plot position of features
    scale = max(ys)
    feats = ["cpgi", "prdm9", "spot"]
    yfeats = np.linspace(-scale*0.1, -scale*0.3, len(feats))
    for y,feature in enumerate(feats):
        ycor = yfeats[y]
        color = "C{}".format(y)
        if feature=="spot":
            ax.plot([start, end], 
                    [ycor, ycor], 
                    c=color)
        else:
            sdf = df[df[feature]>0]
            for i,r in sdf.iterrows():
                ax.plot([r.start, r.end], 
                        [ycor, ycor],
                        c=color)

    ax.set_xlim([max_start, max_end])
    ax.set_yticks(list(yfeats) + [0, scale])
    ax.set_yticklabels(feats + [0, "{:.2f}".format(scale)])
    ax.set_title("{} | {}\n{}".format(sp, kind, spot_id), fontsize=10)
    ax.set_ylabel("             Relative recombination rate")
    sns.despine()
    plt.tight_layout()
    plt.savefig("../pdfs/{}.{}.{}.pdf".format(sp, kind, spot_id))
    plt.close()