In [38]:
import pandas as pd
import matplotlib.pyplot as plt
from Bio.Data.IUPACData import protein_letters_3to1
import numpy as np
import nmr_prot_assignment as npa
import os
import seaborn as sns

In [39]:
assignment_df = pd.read_csv("All_Assignments.csv").rename(columns={'Unnamed: 0': 'Original_Index'})
anon_assignment_df = pd.read_csv("All_Anonymised_Assignments.csv").rename(columns={'Unnamed: 0': 'Original_Index'})

### Generate a file for Mike to check

In [45]:
def get_entry_info(anon_assignment_df, assignment_df, entry_id):

    if not os.path.exists(str(entry_id)):
        os.mkdir(str(entry_id))

    # Get a certain entry
    assignment_tmp = assignment_df[assignment_df['Entry_ID'] == entry_id]
    anon_assignment_tmp = anon_assignment_df[anon_assignment_df['Entry_ID'] == entry_id]

    anon_assignment_tmp = anon_assignment_tmp.sort_values('Dummy_Comp_index_ID')

    anon_assignment_tmp[['Dummy_Comp_index_ID', 'Atom_ID', 'Val', 'Comp_index_ID', 'Comp_ID_sl']].to_csv(f'{entry_id}/Anonymised_assignment.csv', index=False)
    # assignment_tmp[['Comp_ID_sl', 'Comp_index_ID', 'Atom_ID', 'Val']].to_csv(f'{entry_id}/Assignment.csv', index=False)

    with open(f"{entry_id}/Sequence.txt", "w") as text_file:
        text_file.write(f"Sequence: {anon_assignment_tmp['sequence_from_assignment'].iloc[0]}")

    add_metadata_to_anonymised_assignment_csv(f'{entry_id}/Anonymised_assignment.csv')
    # add_metadata_to_assignment_csv(f'{entry_id}/Assignment.csv')

    #add_metadata_to_assignment_csv(f'{entry_id}/Assignment.csv')

def add_metadata_to_anonymised_assignment_csv(csv_file):

    with open(csv_file, "r") as anon_assingment:
        text = anon_assingment.read()

    # Add some meta data to top of csv files
    with open(csv_file, "w") as anon_assingment:
        anon_assingment.write('# Dummy_Comp_index_ID: Random integer assigned to residue \n')
        anon_assingment.write('# Atom ID: Symbol for atom \n')
        anon_assingment.write('# Val: Chemical shift for corresponding atom \n')
        anon_assingment.write(text)

def add_metadata_to_assignment_csv(csv_file):

    with open(csv_file, "r") as assingment:
        text = assingment.read()

    # Add some meta data to top of csv files
    with open(csv_file, "w") as assingment:
        assingment.write('# Comp_ID_sl: Single Letter Amino Acid \n')
        assingment.write('# Comp_index_ID: Residue Index \n')
        assingment.write('# Atom ID: Symbol for atom \n')
        assingment.write('# Val: Chemical shift for corresponding atom \n')
        assingment.write(text)


In [46]:
get_entry_info(anon_assignment_df, assignment_df, 4081)

### Plot Distributions of each Atom

In [None]:
import plotly.express as px

bmrb_mean_shifts = {'H': 8.193, 'C': 177.750, 'CA': 53.158, 'CB': 19.033, 'N': 123.351}

fig = px.histogram(assignment_df, x='Val', color='Atom_ID', opacity=0.75, marginal='box')

for key, values in bmrb_mean_shifts.items():
    fig.add_vline(x=values, line_width=1, line_dash="dash", line_color="black", annotation_text=key)

fig

## Define Inputs and Outputs to model

In [None]:
input_df = npa.get_model_outputs_from_assignment_df(assignment_df)

In [None]:
output_df = npa.get_model_inputs_from_anonymised_assignments_df(assignment_df)

In [None]:
output_df