In [7]:
import plotly.graph_objects as go
import plotly.express as px

import os
import pandas as pd
import numpy as np

from src.curve_fitting import (
    OutlierRemover,
    DataBinner,
    Fitter,
    ModelSelector,
    )

np.set_printoptions(
            formatter={"float": lambda x: "{0:0.3f}".format(x)},
            linewidth=100,
            suppress=True,
        )
np.set_printoptions(threshold=5)

In [2]:
import pickle

with open("data/protein_calculations.pkl", "rb") as f:
    protein_calculations = pickle.load(f)

with open("data/ssdna_calculations.pkl", "rb") as f:
    ssdna_calculations = pickle.load(f)

In [8]:
# Create the OutlierRemover, DataBinner, and Fitter objects
outlier_remover = OutlierRemover()
data_binner = DataBinner()
fitter = Fitter()
model_select = ModelSelector()

maxfev = 1000000

In [9]:
# df = pd.read_csv(data_file)
# outlier_remover = OutlierRemover()
# df = outlier_remover.remove_outliers(df, angle_col)

# data_binner = DataBinner()
# x, y = data_binner.get_bins_U_norm(df, angle_col)

# fitter = Fitter()

# fits = {}
# for num_gaussians in range(1, 6):
#     fits[num_gaussians] = fitter.fit_func(x, y, num_gaussians)

# model_selector = ModelSelector()
# (
#     best_fit,
#     best_func,
#     best_score,
#     best_num_gaussians,
# ) = model_selector.select_best_model(x, y, fits)

# print(f"Best model has {best_num_gaussians} Gaussians with a score of {best_score}")

# plt.plot(x, y, label="Data")
# plt.plot(x, best_func, label=f"Best fit ({best_num_gaussians} Gaussians)")
# plt.legend()
# plt.show()

In [17]:
data_bbb = protein_calculations["angles"]["bbb"]
# Convert to dataframe
df = pd.DataFrame(data_bbb, columns=["angle"])
# Remove outliers
df = outlier_remover.remove_outliers(df, "angle")
# Get binned data
x, y = data_binner.get_bins_U_norm(df, "angle")
# Fit the data
fits = {}
for num_gaussians in [2,4,6]:
    fits[num_gaussians] = fitter.fit_func(x, y, num_gaussians)
# Select the best model
(
    best_fit,
    best_func,
    best_score,
    best_num_gaussians,
) = model_select.select_best_model(x, y, fits)
print(f"Best model has {best_num_gaussians} Gaussians with a score of {best_score}")


# Plot the data
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=y, mode="markers", name="Data"))
fig.add_trace(go.Scatter(x=x, y=best_func, mode="lines", name=f"Best fit ({best_num_gaussians} Gaussians)"))
fig.update_layout(
    title="Protein BBB Angles",
    xaxis_title="Angle (degrees)",
    yaxis_title="Probability",
    legend_title="Legend",
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="RebeccaPurple"
    ),
)
fig.show()

# Save the best fit plot
fig.write_image("plots/protein_bbb_angles_fit.png")
fig.write_html("htmls/protein_bbb_angles_fit.html")


divide by zero encountered in log


Covariance of the parameters could not be estimated



Best model has 6 Gaussians with a score of 0.4282832287308752
