In [7]:
import pandas as pd
import json

def analyze_output_archcandy(csv_file, protein_seq, protein_name):
    amyloid_df = pd.read_csv(csv_file)
    
    positions = list(range(1, len(protein_seq) + 1))
    
    scores = [0.0] * len(protein_seq)
    amyloid_status = [0] * len(protein_seq)
    
    for _, row in amyloid_df.iterrows():
        start = row['Start'] - 1
        stop = row['Stop'] - 1
        score = row['Score']
        
        for pos in range(start, stop + 1):
            if pos < len(protein_seq):
                if score > scores[pos]:
                    scores[pos] = score
                amyloid_status[pos] = 1
    result_df = pd.DataFrame({
        'Position': positions,
        'Amyloid_Score': scores,
        'Amyloid_Status': amyloid_status
    })

    # Сохраняем
    output_file = f"{protein_name}_ArchCandy_parsed.csv"
    result_df.to_csv(output_file, index=False)
    
    return result_df, amyloid_df

In [9]:
def analyze_output_crossbeta(json_file, threshold, protein_name):
    with open(json_file, 'r') as f:
        data = json.load(f)

    # Извлекаем первый элемент из списка по ключу (ID)
    first_key = list(data.keys())[0]
    sequence_data = data[first_key][0]['AA_list']

    df = pd.DataFrame([
    {
        'aa': item['index'] + 1,  # Переименовываем и добавляем 1
        'amino_acid': item['amino_acid'],
        'score': item['mean_confidence']
    }
    for item in sequence_data
    ])

    df.index = df.index + 1

    df['amyloid'] = (df['score'] >= threshold).astype(int)

    df = df.drop(columns = 'amino_acid')

    output_file = f"{protein_name}_crossbeta_parsed.csv"

    df.to_csv(output_file, index=False)

In [11]:
def analyze_output_pasta(pasta_file, protein_name):
    df = pd.read_csv(pasta_file, header=None, names=['value'])

    df = pd.DataFrame({
        'aa': range(1, len(df) + 1),
        'energy': df['value'].values,
        'amyloid': (df['value'] < -5).astype(int)
    })

    output_file = f"{protein_name}_pasta_parsed.csv"

    df.to_csv(output_file, index=False)

In [13]:
def analyze_output_aggreprot(aggreprot_file, protein_name):
    df = pd.read_csv(aggreprot_file, header = 1)

    df = df.drop(columns=['struct_position', 'amino_acid', 'sasa', 'transmembrane'])

    df = df.rename(columns={'position' : 'aa', 'aggregation':'score'})
    df['amyloid'] = (df['score'] >= 0.25).astype(int)

    output_file = f"{protein_name}_aggreprot_parsed.csv"

    df.to_csv(output_file, index=False)

In [15]:
def analyze_output_waltz(waltz_file, protein_name):
    df = pd.read_csv(waltz_file, sep ='\t', header = None)
    df = pd.DataFrame({
        'aa': df[0],
        'score': df[1].values,
        'amyloid': (df[1] != 0).astype(int)
    })

    output_file = f"{protein_name}_waltz_parsed.csv"

    df.to_csv(output_file, index=False)

In [16]:
analyze_output_waltz('APP_human.dat', 'APP_human')