In [1]:
# Load the data
import os
import pandas as pd
import pickle

xu_masses = [500, 750, 1000, 1500, 2000]  # GeV
phi_masses = [1, 5, 10, 50, 100, 325]  # GeV
mc_names = ["signal", "ttbarmumu", "ttbarmumumunu"]

data_path = os.path.join(os.getcwd(), "data")
print("Loading data from", data_path)

data = {
    "xu_{}_phi_{}".format(xu_mass, phi_mass): {
        f"{mc}": pd.read_csv(
            os.path.join(
                data_path,
                f"xu_{xu_mass}_GeV",
                f"phi_{phi_mass}_GeV",
                f"{mc}_hist.dat",
            ),
            sep=" ",
            names=["bin_center", "bin_content"],
            header=None,
        )
        for mc in mc_names
    }
    for xu_mass in xu_masses
    for phi_mass in phi_masses
}
print("Done!")

Loading data from /workspaces/stats_intro/data
Done!


In [2]:
from itertools import product

# This transformation was necessary due to a data storage error.
# Originally, the data was stored in a way that the 'bin_center' values were in
#  reverse order and the 'bin_content' values were stored as their complements
# (i.e., 1 - original value), and in the wrong bin order.
for key, mc in product(data.keys(), mc_names):
    if mc == "signal":
        continue
    transformed_data = [
        [
            data[key][mc]["bin_center"][49 - x],
            1 - data[key][mc]["bin_content"][x],
        ]
        for x in range(50)
    ]
    data[key][mc] = pd.DataFrame(
        transformed_data, columns=["bin_center", "bin_content"]
    )

In [3]:
merged_data = {}
for key in data:
    merged_df = pd.merge(
        data[key]["signal"], data[key]["ttbarmumu"], on="bin_center"
    )
    merged_df = pd.merge(
        merged_df, data[key]["ttbarmumumunu"], on="bin_center"
    )
    merged_df.columns = ["bin_center", "signal", "ttbarmumu", "ttbarmumumunu"]
    merged_data[key] = merged_df


# Save the data
with open(os.path.join(data_path, "histograms.pkl"), "wb") as f:
    pickle.dump(merged_data, f)

merged_data["xu_500_phi_1"].head()

Unnamed: 0,bin_center,signal,ttbarmumu,ttbarmumumunu
0,0.01,0.996261,2.1e-05,1.151861e-07
1,0.03,0.00183,9e-06,1.175761e-07
2,0.05,0.00055,1e-05,1.200624e-07
3,0.07,0.00027,1e-05,1.226509e-07
4,0.09,0.00025,1e-05,1.253482e-07


In [4]:
from io import StringIO

data = """
0.1292000000	0.0613100000	0.0299900000	0.0073040000	0.0016790000
0.1291000000	0.0612500000	0.0299600000	0.0072960000	0.0016760000
0.1291000000	0.0612400000	0.0299500000	0.0072960000	0.0016740000
0.1262000000	0.0606400000	0.0298000000	0.0072800000	0.0016710000
0.1172000000	0.0589500000	0.0293400000	0.0072260000	0.0016650000
0.0089690000	0.0388800000	0.0237400000	0.0066090000	0.0015850000
"""

# Read the data into a DataFrame
xs_df = pd.read_csv(StringIO(data), sep="\t", header=None)

columns = "500 750 1000 1500 2000"
xs_df.columns = [f"xu_{int(x)}_GeV" for x in columns.split()]

# New code
row_titles = [1, 5, 10, 50, 100, 325]
row_titles = [f"phi_{x}_GeV" for x in row_titles]
xs_df.index = row_titles

# Save the DataFrame to a pickle file
with open(os.path.join(data_path, "cross_sections.pkl"), "wb") as f:
    pickle.dump(xs_df, f)