In [22]:
!pip install pandas scikit-learn gradio



In [18]:
# Import libraries
import pandas as pd
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [19]:
df = pd.read_csv('/content/gene_interaction_dataset.csv')  # Replace with your actual file name

In [20]:
# ✅ Strip whitespace from string columns to avoid mismatches
df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

# ✅ Store unencoded copy for disease lookup
original_df = df.copy()

# Encode categorical columns
le_geneA = LabelEncoder()
le_geneB = LabelEncoder()
le_coexp = LabelEncoder()
le_ppi = LabelEncoder()
le_pathway = LabelEncoder()

df['Gene A'] = le_geneA.fit_transform(df['Gene A'])
df['Gene B'] = le_geneB.fit_transform(df['Gene B'])
df['Co-expression'] = le_coexp.fit_transform(df['Co-expression'])
df['PPI Link'] = le_ppi.fit_transform(df['PPI Link'])
df['Shared Pathways'] = le_pathway.fit_transform(df['Shared Pathways'])

# Features and label
X = df[['Gene A', 'Gene B', 'Co-expression', 'PPI Link', 'Shared Pathways']]
y = df['Output']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"\n✅ Accuracy Score: {accuracy:.2f}")
print("\n✅ Classification Report:")
print(report)

# Prediction function
def predict_disease(gene_a, gene_b, coexp, ppi, pathway):
    try:
        gene_a = gene_a.strip()
        gene_b = gene_b.strip()
        coexp = coexp.strip()
        ppi = ppi.strip()
        pathway = pathway.strip()

        geneA_encoded = le_geneA.transform([gene_a])[0]
        geneB_encoded = le_geneB.transform([gene_b])[0]
        coexp_encoded = le_coexp.transform([coexp])[0]
        ppi_encoded = le_ppi.transform([ppi])[0]
        pathway_encoded = le_pathway.transform([pathway])[0]
    except Exception as e:
        return f"Invalid input values: {str(e)}"

    prediction = model.predict([[geneA_encoded, geneB_encoded, coexp_encoded, ppi_encoded, pathway_encoded]])[0]

    if prediction == 1:
        match = original_df[
            (original_df['Gene A'].str.strip() == gene_a) &
            (original_df['Gene B'].str.strip() == gene_b) &
            (original_df['Co-expression'].str.strip() == coexp) &
            (original_df['PPI Link'].str.strip() == ppi) &
            (original_df['Shared Pathways'].str.strip() == pathway)
        ]
        if not match.empty:
            return f"✅ Predicted Disease: {match['Shared Diseases'].values[0]}"
        else:
            return "✅ Disease likely, but specific name not available in dataset."
    else:
        return "❌ No disease found."

# Gradio interface
interface = gr.Interface(
    fn=predict_disease,
    inputs=[
        gr.Dropdown(choices=original_df['Gene A'].unique().tolist(), label="Gene A"),
        gr.Dropdown(choices=original_df['Gene B'].unique().tolist(), label="Gene B"),
        gr.Dropdown(choices=original_df['Co-expression'].unique().tolist(), label="Co-expression"),
        gr.Dropdown(choices=original_df['PPI Link'].unique().tolist(), label="PPI Link"),
        gr.Dropdown(choices=original_df['Shared Pathways'].unique().tolist(), label="Shared Pathways"),
    ],
    outputs="text",
    title="🧬 Gene Disease Predictor",
    description=f"Predict if Gene A and Gene B are linked with a disease.\nModel Accuracy: {accuracy:.2f}"
)

# Launch app
interface.launch()


✅ Accuracy Score: 0.94

✅ Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.92      0.96        24
           1       0.86      1.00      0.92        12

    accuracy                           0.94        36
   macro avg       0.93      0.96      0.94        36
weighted avg       0.95      0.94      0.95        36

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6c496746e6855fa2d2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


