<a href="https://colab.research.google.com/github/AakashPuthussery/Drug-Severity-Prediction-RF/blob/main/ML_Model_Generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
file_path = '/content/synthetic_genetic_data.csv'
data = pd.read_csv(file_path)

# Data Preprocessing
# Encode 'Gender' using LabelEncoder
label_encoder = LabelEncoder()
data['Gender'] = label_encoder.fit_transform(data['Gender'])

# Split the data into features and target variable
X = data.drop('Addiction_Risk', axis=1)
y = data['Addiction_Risk']

# Scale numerical features (optional, but recommended for some models)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Model Training
# Using Random Forest Classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Model Evaluation
# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(report)


Accuracy: 0.66
Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.91      0.78       132
           1       0.50      0.18      0.26        68

    accuracy                           0.66       200
   macro avg       0.59      0.54      0.52       200
weighted avg       0.62      0.66      0.60       200



In [5]:
# Function to predict risk based on new input data
def predict_risk(new_data):
    # Ensure the new data is a DataFrame
    if isinstance(new_data, dict):
        new_data = pd.DataFrame([new_data])
    elif isinstance(new_data, list):
        new_data = pd.DataFrame(new_data)

    # Encode and scale the new data
    new_data['Gender'] = label_encoder.transform(new_data['Gender'])
    new_data_scaled = scaler.transform(new_data)

    # Predict risk
    predictions = model.predict(new_data_scaled)
    return predictions

# Example usage
new_input = {
    'DRD2': 2,
    'OPRM1': 0,
    'SLC6A4': 2,
    'COMT': 2,
    'MAOA': 0,
    'Age': 52,
    'Gender': 'Female'
}
predicted_risk = predict_risk(new_input)
print("Predicted Addiction Risk:", predicted_risk)

Predicted Addiction Risk: [1]


In [6]:
import joblib

# Save the trained model to a file
joblib.dump(model, 'addiction_risk_model.pkl')

# Save the scaler and label encoder as well
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')


['label_encoder.pkl']