In [7]:
# Load the data
import os
import pandas as pd
import pickle

xu_masses = [500, 750, 1000, 1500, 2000]  # GeV
phi_masses = [1, 5, 10, 50, 100, 325]  # GeV
mc_names = ["signal", "ttbarmumu", "ttbarmumumunu"]

data_path = os.path.join(os.getcwd(), "data")
print("Loading data from", data_path)

data = {
    "xu_{}_phi_{}".format(xu_mass, phi_mass): {
        f"{mc}": pd.read_csv(
            os.path.join(
                data_path,
                f"xu_{xu_mass}_GeV",
                f"phi_{phi_mass}_GeV",
                f"{mc}_hist.dat",
            ),
            sep=" ",
            names=["bin_center", "bin_content"],
            header=None,
        )
        for mc in mc_names
    }
    for xu_mass in xu_masses
    for phi_mass in phi_masses
}
print("Done!")

Loading data from /workspaces/stats_intro/data
Done!


In [8]:
from itertools import product

# This transformation was necessary due to a data storage error.
# Originally, the data was stored in a way that the 'bin_center' values were in
#  reverse order and the 'bin_content' values were stored as their complements
# (i.e., 1 - original value), and in the wrong bin order.
for key, mc in product(data.keys(), mc_names):
    if mc == "signal":
        continue
    transformed_data = [
        [
            data[key][mc]["bin_center"][49 - x],
            1 - data[key][mc]["bin_content"][x],
        ]
        for x in range(50)
    ]
    data[key][mc] = pd.DataFrame(
        transformed_data, columns=["bin_center", "bin_content"]
    )

In [9]:
merged_data = {}
for key in data:
    merged_df = pd.merge(
        data[key]["signal"], data[key]["ttbarmumu"], on="bin_center"
    )
    merged_df = pd.merge(
        merged_df, data[key]["ttbarmumumunu"], on="bin_center"
    )
    merged_df.columns = ["bin_center", "signal", "ttbarmumu", "ttbarmumumunu"]
    merged_data[key] = merged_df

print(merged_data["xu_500_phi_1"].head())
print(merged_data["xu_500_phi_1"].tail())

   bin_center    signal  ttbarmumu  ttbarmumumunu
0        0.01  0.996261   0.000021   1.151861e-07
1        0.03  0.001830   0.000009   1.175761e-07
2        0.05  0.000550   0.000010   1.200624e-07
3        0.07  0.000270   0.000010   1.226509e-07
4        0.09  0.000250   0.000010   1.253482e-07
    bin_center    signal  ttbarmumu  ttbarmumumunu
45        0.91  0.000002   0.000044       0.000001
46        0.93  0.000002   0.000040       0.000001
47        0.95  0.000002   0.000170       0.000002
48        0.97  0.000002   0.000440       0.000010
49        0.99  0.000002   0.998468       0.999973


In [10]:
from io import StringIO

data = """
0.1292000000	0.0613100000	0.0299900000	0.0073040000	0.0016790000
0.1291000000	0.0612500000	0.0299600000	0.0072960000	0.0016760000
0.1291000000	0.0612400000	0.0299500000	0.0072960000	0.0016740000
0.1262000000	0.0606400000	0.0298000000	0.0072800000	0.0016710000
0.1172000000	0.0589500000	0.0293400000	0.0072260000	0.0016650000
0.0089690000	0.0388800000	0.0237400000	0.0066090000	0.0015850000
"""

# Read the data into a DataFrame
xs_df = pd.read_csv(StringIO(data), sep="\t", header=None)

# Set the column names to the xu masses
xs_df.columns = [f"xu_{int(x)}_GeV" for x in xu_masses]

# Set the index to the phi masses
xs_df.index = [f"phi_{x}_GeV" for x in phi_masses]

# Save the DataFrame to a pickle file
with open(os.path.join(data_path, "cross_sections.pkl"), "wb") as f:
    pickle.dump(xs_df, f)

In [11]:
lumino = 150 / 1e-3  # 1/fb to 1/pb
eff = 1.0  # Assume 100% efficiency for now
mc_names = ["signal", "ttbarmumu", "ttbarmumumunu"]
for xu_mass, phi_mass in product(xu_masses, phi_masses):
    xs_dict = {}
    xs_dict["signal"] = xs_df.loc[
        f"phi_{phi_mass}_GeV", f"xu_{xu_mass}_GeV"
    ]  # pb
    xs_dict["ttbarmumu"] = 0.002574  # pb
    xs_dict["ttbarmumumunu"] = 0.0004692  # pb

    for mc in mc_names:
        merged_data[f"xu_{xu_mass}_phi_{phi_mass}"][mc] *= (
            xs_dict[mc] * lumino * eff
        )
    merged_data[f"xu_{xu_mass}_phi_{phi_mass}"]["background"] = (
        merged_data[f"xu_{xu_mass}_phi_{phi_mass}"]["ttbarmumu"]
        + merged_data[f"xu_{xu_mass}_phi_{phi_mass}"]["ttbarmumumunu"]
    )

# Save the data
with open(os.path.join(data_path, "histograms.pkl"), "wb") as f:
    pickle.dump(merged_data, f)

{'xu_500_phi_1':     bin_center        signal   ttbarmumu  ttbarmumumunu
 0         0.01  19307.539347    0.008118       0.000008
 1         0.03     35.464012    0.003630       0.000008
 2         0.05     10.658584    0.003684       0.000008
 3         0.07      5.232395    0.003698       0.000009
 4         0.09      4.844811    0.003776       0.000009
 5         0.11      2.906886    0.005699       0.000009
 6         0.13      1.389715    0.003920       0.000009
 7         0.15      1.681564    0.003985       0.000009
 8         0.17      1.301758    0.004053       0.000010
 9         0.19      0.853907    0.004215       0.000010
 10        0.21      0.682818    0.005938       0.000010
 11        0.23      0.720583    0.004417       0.000010
 12        0.25      1.088648    0.004351       0.000011
 13        0.27      0.773572    0.004434       0.000011
 14        0.29      0.401257    0.006167       0.000011
 15        0.31      0.369434    0.004874       0.000012
 16        0.33