## Imports / Helper Functions

### Imports

In [1]:
import pandas as pd
import reciprocalspaceship as rs
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score, auc
from sklearn.model_selection import train_test_split
import numpy as np
import json
from scipy import stats
import random
import math
import gemmi
from sklearn.decomposition import PCA
import time
import sys
from scipy.stats import pearsonr, skew, kurtosis
import pickle
from tqdm.notebook import tqdm

In [4]:
samples = json.load(open('../PTP1B_DK/samples.json'))
apo_samples = json.load(open('../PTP1B_DK/apo_samples.json'))
bound_samples = json.load(open('../PTP1B_DK/bound_samples.json'))

## Adding Weights

In [27]:
def compute_weights(df, sigdf, alpha=0):
    """
    Compute weights for each structure factor based on deltaF and its uncertainty
    """
    w = (1 + (sigdf**2 / (sigdf**2).mean()) + alpha*(df**2 / (df**2).mean()))
    return w**-1

In [28]:
for sample in tqdm(samples):
    
    try:
        recons = rs.read_mtz(f'../PTP1B_DK/recons_mtzs/PTP1B-y{sample}_mrflagsref_idxs_recons.mtz')
    except:
        continue
    
    recons['SIGF-scaled'] = recons['SIGF-obs'] * (recons['F-obs-scaled'] / recons['F-obs']).astype("Stddev")
    recons['SIGF-diff'] = np.sqrt(recons["SIGF-scaled"]**2 + recons["SIGF-recons"]**2).astype("Stddev")
    recons['weights'] = compute_weights(recons["F-obs-diff"], recons["SIGF-diff"], alpha=0.05)
    recons['weighted_F-obs-diff'] = (recons["weights"]*recons["F-obs-diff"]).astype("F")
    
    recons.write_mtz(f'../PTP1B_DK/weighted_mtzs/PTP1B-y{sample}_mrflagsref_idxs_diff_weighted.mtz')

  0%|          | 0/1958 [00:00<?, ?it/s]