<a href="https://colab.research.google.com/github/Govindest/Machine-Learning-Model-for-Predicting-Liver-Cancer-Using-miRNA-Expression-Profiles/blob/main/MicroRNAs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load and preprocess the dataset


In [None]:
file_name = 'miRNA Project Data - Sheet1.csv'
data = pd.read_csv(file_name)
relevant_columns = [col for col in data.columns if 'MIMAT' in col] + ['Unnamed: 1']
cleaned_data = data[relevant_columns]
cleaned_data = cleaned_data.rename(columns={'Unnamed: 1': 'Health_Status'})
cleaned_data['Health_Status'] = cleaned_data['Health_Status'].apply(lambda x: 0 if x == 'Healthy' else 1)
X = cleaned_data.drop('Health_Status', axis=1)
y = cleaned_data['Health_Status']



In [None]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter tuning
param_dist = {
    'n_estimators': [100, 200, 300, 400, 500],
    'max_features': ['auto', 'sqrt'],
    'max_depth': [10, 20, 30, 40, 50, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}



In [None]:
rf = RandomForestClassifier(random_state=42)
random_search = RandomizedSearchCV(estimator=rf, param_distributions=param_dist, n_iter=100, cv=3, verbose=2, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)

# Best parameters


Fitting 3 folds for each of 100 candidates, totalling 300 fits


In [None]:
best_params = random_search.best_params_
best_rf = RandomForestClassifier(**best_params, random_state=42)
best_rf.fit(X_train, y_train)



In [None]:
# Prediction function
def predict_cancer(mirna_levels):
    """
    Predicts liver cancer based on miRNA levels.
    Args:
    mirna_levels (list): A list of miRNA expression levels.

    Returns:
    int: Predicted class (0 for Healthy, 1 for Cancer).
    """
    mirna_levels = pd.DataFrame([mirna_levels], columns=X.columns)
    prediction = best_rf.predict(mirna_levels)
    return prediction[0]




In [None]:
# Example usage of the function
example_data = [2.612, 2.612, 2.612]  # Replace with actual miRNA expression levels
prediction = predict_cancer(example_data)
print("Cancer Prediction (0 for Healthy, 1 for Cancer):", prediction)


def

In [None]:
# Predicting on the test set and evaluating accuracy
y_pred = best_rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Set Accuracy:", accuracy)