In [None]:
#Install pyliftover if you haven’t yet:
!pip install pyliftover

In [9]:
# Chromatin State Analysis
!pip install pybedtools



In [18]:
import os
os.environ["BEDTOOLS_PATH"] = "/share/lasallelab/Ensi/anaconda3/allcools/bin/bedtools"  # Adjust this path to where BEDTools is installed on your system.
import pybedtools


In [None]:
# downalod the liftover bedfile

!wget https://hgdownload.soe.ucsc.edu/gbdb/rheMac10/liftOver/rheMac10ToHg38.over.chain.gz
!wget https://encodeproject.org/files/ENCFF001XFG/@@download/ENCFF001XFG.bed.gz

In [1]:
import pandas as pd

# Load the data
file_path = 'global_bycluster_all_regions.csv'
data = pd.read_csv(file_path)

# Filter regions by p-value (adjust the threshold as needed)
p_value_threshold = 0.05
filtered_data = data[data['p_value'] < p_value_threshold]

# Save the filtered data for further processing
filtered_file_path = 'filtered_regions.csv'
filtered_data.to_csv(filtered_file_path, index=False)


#Convert the coordinates in Python:

from pyliftover import LiftOver


# Load the chain file
lo = LiftOver('rheMac10ToHg38.over.chain.gz')

# Create a list to store human coordinates
human_coords = []

# Iterate over filtered regions and perform lift-over
for idx, row in filtered_data.iterrows():
    chrom = row['chromosome']
    start = row['start']
    end = row['end']

    # Convert each position
    human_start = lo.convert_coordinate(chrom, start)
    human_end = lo.convert_coordinate(chrom, end)

    if human_start and human_end:
        human_coords.append([human_start[0][0], human_start[0][1], human_end[0][1]])
    else:
        print(f"Region {chrom}:{start}-{end} could not be lifted over.")

# Create a DataFrame for the lifted regions
lifted_df = pd.DataFrame(human_coords, columns=['chromosome', 'start', 'end'])
lifted_df.to_csv('lifted_regions.csv', index=False)

# download chromatin state file

#https://www.encodeproject.org/files/ENCFF001XFG/@@download/ENCFF001XFG.bed.gz


import requests

# URL of the chromatin state annotation file
url = 'https://www.encodeproject.org/files/ENCFF001XFG/@@download/ENCFF001XFG.bed.gz'

# Path to save the file
output_path = 'chromatin_state_annotations.bed.gz'

# Download the file
response = requests.get(url, stream=True)
with open(output_path, 'wb') as file:
    for chunk in response.iter_content(chunk_size=1024):
        if chunk:
            file.write(chunk)

print("Chromatin state annotation file downloaded successfully.")


#unzip the file 

import gzip
import shutil

# Unzip the downloaded file
with gzip.open('chromatin_state_annotations.bed.gz', 'rb') as f_in:
    with open('chromatin_state_annotations.bed', 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)

print("File unzipped successfully.")



import pybedtools

# Load lifted regions as a BedTool object
lifted_bed = pybedtools.BedTool.from_dataframe(lifted_df)

# Load chromatin state annotations (assuming it's in BED format)
chromatin_file = 'chromatin_state_annotations.bed'
chromatin_bed = pybedtools.BedTool(chromatin_file)

# Intersect lifted regions with chromatin states
intersected = lifted_bed.intersect(chromatin_bed, wa=True, wb=True)

# Convert the result to a DataFrame
intersected_df = intersected.to_dataframe(names=['chrom', 'start', 'end', 'chromatin_state'])

# Save the chromatin state annotated regions
intersected_df.to_csv('chromatin_state_annotated_regions.csv', index=False)


FileNotFoundError: [Errno 2] No such file or directory: 'global_bycluster_all_regions.csv'

In [23]:
!source ~/.bashrc

In [24]:
!conda activate /share/lasallelab/Ensi/anaconda3/allcools/

usage: conda [-h] [--no-plugins] [-V] COMMAND ...
conda: error: argument COMMAND: invalid choice: 'activate' (choose from 'clean', 'compare', 'config', 'create', 'info', 'init', 'install', 'list', 'notices', 'package', 'remove', 'uninstall', 'rename', 'run', 'search', 'update', 'upgrade', 'build', 'content-trust', 'convert', 'debug', 'develop', 'doctor', 'index', 'inspect', 'metapackage', 'render', 'repoquery', 'skeleton', 'verify', 'repo', 'env', 'server', 'token', 'pack')


In [25]:
# the same code to fix the error;
import pandas as pd
from pyliftover import LiftOver
import requests
import gzip
import shutil
import pybedtools

In [33]:
#creating lifover files for signiifcanr regions by cluster and content

import pandas as pd
from pyliftover import LiftOver

# Define your clusters and methylation contexts
clusters = ['c0', 'c1', 'c2', 'c3']
contexts = ['mCG', 'mCH']

# Initialize the liftover converter
lo = LiftOver('rheMac10ToHg38.over.chain.gz')

# Function to perform liftover and export to BED
def process_file(cluster, context):
    file_path = f'../significant_{context}_{cluster}.csv'
    data = pd.read_csv(file_path)
    human_coords = []

    for idx, row in data.iterrows():
        chrom = row['chromosome']
        start = row['start']
        end = row['end']

        human_start = lo.convert_coordinate(chrom, start)
        human_end = lo.convert_coordinate(chrom, end)

        if human_start and human_end:
            start_new = min(human_start[0][1], human_end[0][1])
            end_new = max(human_start[0][1], human_end[0][1])
            human_coords.append([human_start[0][0], start_new, end_new])
        else:
            print(f"Region {chrom}:{start}-{end} could not be lifted over.")

    lifted_df = pd.DataFrame(human_coords, columns=['chromosome', 'start', 'end'])
    output_bed_file = f'lifted_regions_{context}_{cluster}.bed'
    lifted_df.to_csv(output_bed_file, sep='\t', index=False, header=False)
    print(f"File processed and saved as {output_bed_file}")

# Loop over all combinations of clusters and contexts
for cluster in clusters:
    for context in contexts:
        process_file(cluster, context)




Region chr1:54000000-54100000 could not be lifted over.
Region chr1:54100000-54200000 could not be lifted over.
Region chr1:54400000-54500000 could not be lifted over.
Region chr1:54500000-54600000 could not be lifted over.
Region chr1:54800000-54900000 could not be lifted over.
Region chr1:54900000-55000000 could not be lifted over.
Region chr1:55100000-55200000 could not be lifted over.
Region chr1:55200000-55300000 could not be lifted over.
Region chr1:55900000-56000000 could not be lifted over.
Region chr1:56000000-56100000 could not be lifted over.
Region chr1:56600000-56700000 could not be lifted over.
Region chr1:56700000-56800000 could not be lifted over.
Region chr1:57600000-57700000 could not be lifted over.
Region chr1:57700000-57800000 could not be lifted over.
Region chr1:58000000-58100000 could not be lifted over.
Region chr1:58100000-58200000 could not be lifted over.
Region chr1:59400000-59500000 could not be lifted over.
Region chr1:59500000-59600000 could not be lifte

In [27]:
# Step 3: Download and unzip the chromatin state annotation file


url = 'https://www.encodeproject.org/files/ENCFF001XFG/@@download/ENCFF001XFG.bed.gz'
output_path = 'chromatin_state_annotations.bed.gz'
response = requests.get(url, stream=True)

with open(output_path, 'wb') as file:
    for chunk in response.iter_content(chunk_size=1024):
        if chunk:
            file.write(chunk)
print("Chromatin state annotation file downloaded successfully.")

# Unzip the chromatin state file
with gzip.open(output_path, 'rb') as f_in:
    with open('chromatin_state_annotations.bed', 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)
print("File unzipped successfully.")

# Step 4: Intersect lifted regions with chromatin state annotations
lifted_bed = pybedtools.BedTool.from_dataframe(lifted_df)
chromatin_bed = pybedtools.BedTool('chromatin_state_annotations.bed')
intersected = lifted_bed.intersect(chromatin_bed, wa=True, wb=True)

# Convert the intersected result to DataFrame and save
intersected_df = intersected.to_dataframe(names=['chrom', 'start', 'end', 'chromatin_state'])
intersected_df.to_csv('chromatin_state_annotated_regions.csv', index=False)
print("Chromatin state annotated regions saved successfully.")

Chromatin state annotation file downloaded successfully.
File unzipped successfully.


NotImplementedError: "intersectBed" does not appear to be installed or on the path, so this method is disabled.  Please install a more recent version of BEDTools and re-import to use this method.