In [None]:
import numpy as np
import pandas as pd
from gplearn.genetic import SymbolicRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error

In [None]:
dataset_survival = pd.read_csv('/home/kidneyData/GP_Survival/dataset_regfinaldata_livingDon_2022_redoMAR30.csv', encoding="latin1", low_memory=False)
dataset_survival.drop(columns=dataset_survival.columns[0], axis=1, inplace=True)
dataset_survival.head()

In [None]:
# Sample data based on the provided dataset
data = {
    'CAN_LAST_SRTR_PEAK_PRA': [0.0, 3.0, 7.0, 17.0, 0.0],
    'DON_RACE': ['Whites', 'Blacks', 'Whites', 'Blacks', 'Whites'],
    'DON_GENDER': ['M', 'M', 'F', 'F', 'F'],
    # ... add other features and target variables here ...
    'Avg_EMS_30': [1.8, 3.0, 1.8, 3.0, 1.8]  # Assuming 'Avg_EMS_30' is the target variable
}

In [None]:
# Create a DataFrame
df = pd.DataFrame(data)

In [None]:
# Separate features and target
features = df.drop(['Avg_EMS_30'], axis=1)
target = df['Avg_EMS_30']

In [None]:
# Perform one-hot encoding for categorical variables
features_encoded = pd.get_dummies(features, columns=['DON_RACE', 'DON_GENDER'])


In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features_encoded, target, test_size=0.2, random_state=42)

In [None]:
# Initialize the symbolic regressor
est_gp = SymbolicRegressor(population_size=5000, generations=20, stopping_criteria=0.01,
                           p_crossover=0.7, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0)

In [None]:
# Fit the model
est_gp.fit(X_train, y_train)

In [None]:
# Check if the model is fitted
if est_gp._program is None:
    print("Model is not fitted. Please fit the model before visualization.")
else:
    # Predict on the testing set
    y_pred = est_gp.predict(X_test)

In [None]:
# Evaluate the model (e.g., using mean squared error)
mse = mean_squared_error(y_test, y_pred)
print('Mean Squared Error:', mse)

In [None]:
# Print the symbolic expression
print('Symbolic Expression:', est_gp._program)

In [None]:
# Plot the actual values against the predicted values
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, color='blue')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], linestyle='--', color='red', linewidth=2)
plt.xlabel('Actual Avg_EMS_30')
plt.ylabel('Predicted Avg_EMS_30')
plt.title('Actual vs Predicted Avg_EMS_30')
plt.grid(True)
plt.show()

In [None]:
%pip install pydot

In [None]:
import pydot
from IPython.display import Image

# Assuming 'est_gp._program' contains the symbolic expression
# You may need to modify this based on the actual structure of the symbolic expression
symbolic_expression = est_gp._program

# Visualize the symbolic expression as a tree
graph = pydot.graph_from_dot_data(symbolic_expression.export_graphviz())
image = Image(graph[0].create_png())

# Display the image
display(image)
