<a href="https://colab.research.google.com/github/Cypherjac/genome-navigator/blob/main/Genome_Navigator_GWAS_iter_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# GWAS Summary Statistics Data (CSV)
# TO EXTRACT GWAS DATA FROM APIs AVAILABLE
gwas_data_path = "gwas_data.csv"
df = pd.read_csv(gwas_data_path)

# Drop non-numeric columns (e.g., SNP ID, Chromosome, Position)
X = df.drop(columns=["SNP_ID", "Chromosome", "Position"]).values
y = (df["p-value"] < 5e-8).astype(int).values

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def create_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(input_shape,)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = create_model(X_train.shape[1])
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# MODEL EVALUATION
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

model.save("gwas_model.h5")

# MODEL INFERENCE
def predict_gwas_variant(features):
    model = tf.keras.models.load_model("gwas_model.h5")
    features = np.array(features).reshape(1, -1)
    features = scaler.transform(features)
    prediction = model.predict(features)[0][0]
    return "Significant Variant" if prediction > 0.5 else "Not Significant"

