In [None]:
import json
import pandas as pd

# Function to process the JSON data into a format suitable for a heatmap
def process_json_for_heatmap(json_filepath, output_filepath):
    # Load the data
    with open(json_filepath, 'r') as f:
        data = json.load(f)

    # Convert to DataFrame
    df = pd.DataFrame(data)

    # Extract the numeric part of the Source and Target
    df['Source'] = df['Source'].str.extract('(\d+)').astype(int)
    df['Target'] = df['Target'].str.extract('(\d+)').astype(int)

    # Create a pivot table
    heatmap_data = df.pivot_table(index='Source', columns='Target', values='Weight', fill_value=0)

    # Normalize the weights by sqrt as mentioned earlier, if needed
    # heatmap_data = np.sqrt(heatmap_data)

    # Fill in the missing values with 0s and ensure the matrix is square
    max_index = max(heatmap_data.index.union(heatmap_data.columns))
    heatmap_matrix = heatmap_data.reindex(index=range(max_index+1), columns=range(max_index+1), fill_value=0)

    # Convert the matrix back to a format that can be used for heatmap (list of dictionaries)
    processed_data = heatmap_matrix.stack().reset_index().rename(columns={0: 'Weight'})
    processed_data = processed_data[processed_data['Weight'] > 0].to_dict(orient='records')

    # Save the processed data to a JSON file
    with open(output_filepath, 'w') as f:
        json.dump(processed_data, f, indent=4)

# Replace 'path_to_your_large_json_file.json' with the path to your JSON file
# Replace 'output_processed_heatmap_data.json' with the path where you want the processed data to be saved
process_json_for_heatmap('/content/WT_BS_Edge.json', '/content/WT_BS_Edge_processed.json')


Including Interaction attribute

In [9]:
import json
import pandas as pd

# Function to process the JSON data into a format suitable for a heatmap
def process_json_for_heatmap(json_filepath, output_filepath, chromosome_bins):
    # Load the data
    with open(json_filepath, 'r') as f:
        data = json.load(f)

    # Convert to DataFrame
    df = pd.DataFrame(data)

    # Extract the numeric part of the Source and Target
    df['Source'] = df['Source'].astype(int)
    df['Target'] = df['Target'].astype(int)

    # Determine interaction type
    def determine_interaction(source, target, bins):
        for start, end in bins:
            if start <= source <= end and start <= target <= end:
                return 1  # Intra-interaction
        return 2  # Inter-interaction

    df['Interaction'] = df.apply(lambda row: determine_interaction(row['Source'], row['Target'], chromosome_bins), axis=1)

    # Create a pivot table
    heatmap_data = df.pivot_table(index='Source', columns='Target', values='Weight', fill_value=0)

    # Fill in the missing values with 0s and ensure the matrix is square
    max_index = max(heatmap_data.index.union(heatmap_data.columns))
    heatmap_matrix = heatmap_data.reindex(index=range(max_index+1), columns=range(max_index+1), fill_value=0)

    # Convert the matrix back to a format that can be used for heatmap (list of dictionaries)
    processed_data = heatmap_matrix.stack().reset_index().rename(columns={0: 'Weight'})
    processed_data = processed_data[processed_data['Weight'] > 0]

    # Add the interaction data back to the processed data
    processed_data = processed_data.merge(df[['Source', 'Target', 'Interaction']], on=['Source', 'Target'], how='left')

    processed_data = processed_data.to_dict(orient='records')

    # Save the processed data to a JSON file
    with open(output_filepath, 'w') as f:
        json.dump(processed_data, f, indent=4)

# Example chromosome bin ranges (these need to be defined as per the specific data)
chromosome_bins = [(1, 404)]

# Replace 'path_to_your_large_json_file.json' with the path to your JSON file
# Replace 'output_processed_heatmap_data.json' with the path where you want the processed data to be saved
process_json_for_heatmap('/content/WT_BS_Edge_processed.json', '/content/WT_BS_Edge_processed_with_interaction.json', chromosome_bins)
