In [2]:
#Name: Amrutha Paalathara
#Class: 5 Msc DSA
#Register Number: 23122004

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report,
)

# Load your dataset
data = pd.read_csv('cancerAllv3.csv')

# Define features and target variable
features = data.drop('diagnosis', axis=1)
target = data['diagnosis']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=42)

# Train the Decision Tree model
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)

# Make predictions and evaluate the model
y_pred = decision_tree.predict(X_test)
print("Decision Tree Evaluation Metrics:")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print(f"Precision: {precision_score(y_test, y_pred):.2f}")
print(f"Recall: {recall_score(y_test, y_pred):.2f}")
print(f"F1-Score: {f1_score(y_test, y_pred):.2f}")
print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Decision Tree Evaluation Metrics:
Accuracy: 0.94
Precision: 0.89
Recall: 0.94
F1-Score: 0.91
Confusion Matrix:
[[101   7]
 [  4  59]]

Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.94      0.95       108
           1       0.89      0.94      0.91        63

    accuracy                           0.94       171
   macro avg       0.93      0.94      0.93       171
weighted avg       0.94      0.94      0.94       171



In [4]:
import numpy as np
import gym
from gym import spaces

class CancerEnv(gym.Env):
    def __init__(self, decision_tree, current_conditions):
        super(CancerEnv, self).__init__()
        self.decision_tree = decision_tree
        self.current_conditions = current_conditions
        self.action_space = spaces.Discrete(2)  # Actions: 0 = maintain, 1 = override
        self.observation_space = spaces.Box(low=0, high=1, shape=(len(current_conditions) - 1,), dtype=np.float32)
        self.max_steps = 1  # End episode after one step
        self.current_step = 0

    def reset(self):
        self.current_step = 0
        return np.array(list(self.current_conditions.values())[:-1])  # Exclude diagnosis

    def step(self, action):
        # Prepare input data with feature names
        features = list(self.current_conditions.keys())[:-1]
        input_data = pd.DataFrame([list(self.current_conditions.values())[:-1]], columns=features)

        # Get decision tree prediction
        dt_prediction = self.decision_tree.predict(input_data)[0]

        # Override or maintain prediction based on action
        final_prediction = dt_prediction if action == 0 else (1 - dt_prediction)

        # Reward: +1 for correct prediction, -1 for incorrect
        reward = 1 if final_prediction == self.current_conditions['diagnosis'] else -1

        # Increment step and check if the episode is done
        self.current_step += 1
        done = self.current_step >= self.max_steps

        return np.array(list(self.current_conditions.values())[:-1]), reward, done, {}

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam

# Function to build the model
def build_model(state_size, action_size):
    model = Sequential([
        Input(shape=(state_size,)),
        Dense(24, activation='relu'),
        Dense(24, activation='relu'),
        Dense(action_size, activation='linear')
    ])
    model.compile(loss='mse', optimizer=Adam(learning_rate=0.001))
    return model

In [None]:
# Define the state size based on the features used for prediction
state_size = len(features.columns)  # Match the number of features used in the Decision Tree
action_size = 2  # Maintain or override

# Build the model
model = build_model(state_size, action_size)

# Training parameters
episodes = 50
gamma = 0.95
epsilon = 1.0
epsilon_min = 0.01
epsilon_decay = 0.995

# Example conditions to train the RL environment
current_conditions = dict(zip(features.columns, X_test.iloc[0]))
current_conditions['diagnosis'] = y_test.iloc[0]
env = CancerEnv(decision_tree, current_conditions)

# Train the RL model
for e in range(episodes):
    state = env.reset()
    state = np.reshape(state, (1, model.input_shape[1]))  # Reshape after reset
    done = False
    total_reward = 0

    while not done:
        # Epsilon-greedy action selection
        if np.random.rand() <= epsilon:
            action = np.random.choice(action_size)  # Random action
        else:
            action = np.argmax(model.predict(state, verbose=0))  # Predicted action

        # Take action and observe result
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, (1, model.input_shape[1]))  # Reshape next_state
        total_reward += reward
        #action = np.argmax(model.predict(state, verbose=0))  # Correctly reshaped state

        # Update Q-value
        target = reward
        if not done:
            target = reward + gamma * np.max(model.predict(next_state, verbose=0))

        target_f = model.predict(state, verbose=0)
        target_f[0][action] = target

        model.fit(state, target_f, epochs=1, verbose=0)  # Train the model
        state = next_state

    # Decay epsilon to reduce exploration over time
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay

    print(f"Episode {e + 1}/{episodes} - Total Reward: {total_reward}")

print("RL model training completed.")


In [7]:
# Define the hybrid model prediction function (evaluation only, no training here)
def hybrid_model_prediction(decision_tree, current_conditions, model):
    """
    Predict using a hybrid model combining a Decision Tree and RL-based adjustments.

    Args:
        decision_tree: Trained Decision Tree model.
        current_conditions: Dictionary of current feature values (only features, no 'diagnosis').
        model: Trained RL model.

    Returns:
        str: Final prediction ('Cancer' or 'No Cancer').
    """
    # Prepare the state to match the model's input shape (Features only, excluding 'diagnosis')
    required_features = list(features.columns)  # Features used for training
    state = np.array([current_conditions[feature] for feature in required_features])
    state = np.reshape(state, (1, len(required_features)))  # Reshape to (1, 30)

    # Use RL model to determine the best action (exploration/exploitation)
    best_action = np.argmax(model.predict(state, verbose=0))

    # Convert current_conditions to a DataFrame for Decision Tree prediction
    input_data = pd.DataFrame([state[0]], columns=required_features)

    # Use Decision Tree prediction if action is 0, otherwise override the prediction
    if best_action == 0:
        final_prediction = decision_tree.predict(input_data)[0]
    else:
        final_prediction = 1 - decision_tree.predict(input_data)[0]

    # Return the final prediction as a label
    return 'Cancer' if final_prediction == 1 else 'No Cancer'


# Example evaluation:
# Define a test case with feature values (Example: all features set to 0.5)
new_conditions = dict(zip(features.columns, [0.5] * len(features.columns)))

# Evaluate the hybrid model
final_prediction = hybrid_model_prediction(decision_tree, new_conditions, model)
print("Hybrid Model Prediction:", final_prediction)


Hybrid Model Prediction: No Cancer


In [8]:
from sklearn.metrics import accuracy_score

# Prepare test data and true labels
X_test_data = X_test.to_dict(orient='records')  # Convert test features to list of dictionaries
true_labels = y_test.values  # True labels (1 for Cancer, 0 for No Cancer)

# Generate predictions using the hybrid model
predictions = []

for test_case in X_test_data:
    prediction = hybrid_model_prediction(decision_tree, test_case, model)
    predictions.append(1 if prediction == 'Cancer' else 0)  # Convert 'Cancer' to 1, 'No Cancer' to 0

# Calculate accuracy score
accuracy = accuracy_score(true_labels, predictions)


# Evaluate Hybrid Model
hybrid_predictions = []
for test_case in X_test.to_dict(orient='records'):
    prediction = hybrid_model_prediction(decision_tree, test_case, model)
    hybrid_predictions.append(1 if prediction == 'Cancer' else 0)

# RL Predictions (action-based)
rl_predictions = []
for test_case in X_test.to_dict(orient='records'):
    action = np.argmax(model.predict(np.array(list(test_case.values())).reshape(1, -1), verbose=0))
    rl_predictions.append(1 - action)  # Action 0 = maintain, 1 = override

# Evaluation Metrics
print("\nReinforcement Learning Model Evaluation Metrics:")
print(f"Accuracy: {accuracy_score(y_test, rl_predictions):.2f}")
print(f"Precision: {precision_score(y_test, rl_predictions):.2f}")
print(f"Recall: {recall_score(y_test, rl_predictions):.2f}")
print(f"F1-Score: {f1_score(y_test, rl_predictions):.2f}")
print(f"Confusion Matrix:\n{confusion_matrix(y_test, rl_predictions)}")
print("\nClassification Report:")
print(classification_report(y_test, rl_predictions))

print("\nHybrid Model Evaluation Metrics:")
print(f"Accuracy: {accuracy_score(y_test, hybrid_predictions):.2f}")
print(f"Precision: {precision_score(y_test, hybrid_predictions):.2f}")
print(f"Recall: {recall_score(y_test, hybrid_predictions):.2f}")
print(f"F1-Score: {f1_score(y_test, hybrid_predictions):.2f}")
print(f"Confusion Matrix:\n{confusion_matrix(y_test, hybrid_predictions)}")
print("\nClassification Report:")
print(classification_report(y_test, hybrid_predictions))


Reinforcement Learning Model Evaluation Metrics:
Accuracy: 0.37
Precision: 0.37
Recall: 1.00
F1-Score: 0.54
Confusion Matrix:
[[  0 108]
 [  0  63]]

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       108
           1       0.37      1.00      0.54        63

    accuracy                           0.37       171
   macro avg       0.18      0.50      0.27       171
weighted avg       0.14      0.37      0.20       171


Hybrid Model Evaluation Metrics:
Accuracy: 0.94
Precision: 0.89
Recall: 0.94
F1-Score: 0.91
Confusion Matrix:
[[101   7]
 [  4  59]]

Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.94      0.95       108
           1       0.89      0.94      0.91        63

    accuracy                           0.94       171
   macro avg       0.93      0.94      0.93       171
weighted avg       0.94      0.94      0.94       171



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
