# CleavAI

© 2025 JForCell Corporation. All Rights Reserved.

**Furin mernums:**

```plaintext
MER0000375
MER0000381
MER0000377
MER0000383
MER0002984
MER0002578
MER0000964
MER0004695
```

In [11]:
import pandas as pd
import mysql.connector
from sqlalchemy import create_engine

In [13]:
engine = create_engine("mysql+pymysql://jforcell:jforcell@localhost/merops")

In [22]:
furin_mernums = (
    'MER0000375', 'MER0000381', 'MER0000377', 'MER0000383', 'MER0002984', 'MER0002578', 'MER0000964', 'MER0004695'
)

query = f"""
SELECT c.uniprot_acc, s.sequence, c.p1, c.mernum
FROM cleavage c
JOIN substrate s ON LOWER(c.uniprot_acc) = LOWER(s.uniprot_acc)
WHERE c.mernum IN {furin_mernums}
  AND CHAR_LENGTH(s.sequence) > 0;
"""

df = pd.read_sql(query, engine)
df.head()

Unnamed: 0,uniprot_acc,sequence,p1,mernum
0,P03956,MHSFPPLLLLLFWGVVSHSFPATLETQEQDVDLVQKYLEKYYNLKN...,91,MER0000383
1,P24347,MAPAAWLRSAAARALLPPMLLLLLQPPPLLARALPPDAHHLHAERR...,97,MER0000383
2,P03956,MHSFPPLLLLLFWGVVSHSFPATLETQEQDVDLVQKYLEKYYNLKN...,91,MER0000377
3,P24347,MAPAAWLRSAAARALLPPMLLLLLQPPPLLARALPPDAHHLHAERR...,97,MER0000377
4,P03956,MHSFPPLLLLLFWGVVSHSFPATLETQEQDVDLVQKYLEKYYNLKN...,91,MER0002984


In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 810 entries, 0 to 809
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   uniprot_acc  810 non-null    object
 1   sequence     810 non-null    object
 2   p1           810 non-null    int64 
 3   mernum       810 non-null    object
dtypes: int64(1), object(3)
memory usage: 25.4+ KB


In [24]:
def extract_window(seq: str, p1: int, window: int = 8) -> str | None:
    "Return 8-mer sequence at cleavage(P4–P4′)"
    start = p1 - 4
    end = p1 + 4
    if start < 0 or end > len(seq):
        return None
    return seq[start:end]

df["cleavage_window"] = df.apply(
    lambda row: extract_window(row["sequence"], row["p1"]), axis=1
)

df = df.dropna(subset=["cleavage_window"]).reset_index(drop=True)
df[["cleavage_window"]].head()

Unnamed: 0,cleavage_window
0,KQPRCGVP
1,RQKRFVLS
2,KQPRCGVP
3,RQKRFVLS
4,KQPRCGVP


In [25]:
df[["cleavage_window", "uniprot_acc", "mernum"]].to_csv("furin_cleavage_windows.csv", index=False)