<a href="https://colab.research.google.com/github/Adambay241/Adambay241/blob/main/Task_20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Import necessary extensions, name accordingly.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt


In [None]:
# Load the Titanic dataset
titanic_data = pd.read_csv('Titanic.csv')

# Select relevant features
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']
titanic_data = titanic_data[features + ['Survived']]

# Handle missing values
titanic_data['Age'].fillna(titanic_data['Age'].median(), inplace=True)
titanic_data.dropna(inplace=True)

# Convert categorical variable 'Sex' to numerical
titanic_data['Sex'] = titanic_data['Sex'].map({'male': 0, 'female': 1})


In [None]:
# Split the data into training, development, and test sets
train_data, temp_data = train_test_split(titanic_data, test_size=0.4, random_state=42)
dev_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

X_train = train_data.drop('Survived', axis=1)
y_train = train_data['Survived']
X_dev = dev_data.drop('Survived', axis=1)
y_dev = dev_data['Survived']
X_test = test_data.drop('Survived', axis=1)
y_test = test_data['Survived']


In [None]:
# Train a decision tree without any depth restrictions
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)

# Plot the decision tree
plt.figure(figsize=(20, 10))
plot_tree(decision_tree, filled=True, feature_names=features, class_names=['Not Survived', 'Survived'])
plt.title('Decision Tree without Depth Restriction')
plt.show()


In [None]:
# Make predictions on the development set
y_dev_pred = decision_tree.predict(X_dev)

# Calculate accuracy
dev_accuracy = accuracy_score(y_dev, y_dev_pred)
print(f"Development Set Accuracy: {dev_accuracy:.4f}")


In [None]:
# Experiment with different values of max_depth
train_accuracies = []
dev_accuracies = []
depth_range = range(2, 10)

for depth in depth_range:
    # Train a decision tree with the current max_depth
    model = DecisionTreeClassifier(max_depth=depth, random_state=42)
    model.fit(X_train, y_train)

    # Calculate training accuracy
    y_train_pred = model.predict(X_train)
    train_accuracy = accuracy_score(y_train, y_train_pred)
    train_accuracies.append(train_accuracy)

    # Calculate development accuracy
    y_dev_pred = model.predict(X_dev)
    dev_accuracy = accuracy_score(y_dev, y_dev_pred)
    dev_accuracies.append(dev_accuracy)

    # Plot the tree for each depth
    plt.figure(figsize=(20, 10))
    plot_tree(model, filled=True, feature_names=features, class_names=['Not Survived', 'Survived'])
    plt.title(f"Decision Tree with max_depth={depth}")
    plt.show()


In [None]:
# Plot training and development accuracies
plt.plot(depth_range, train_accuracies, label='Training Accuracy')
plt.plot(depth_range, dev_accuracies, label='Development Accuracy')
plt.xlabel('Max Depth')
plt.ylabel('Accuracy')
plt.title('Accuracy vs Max Depth')
plt.legend()
plt.show()


In [None]:
# Optimal max_depth
optimal_depth = depth_range[dev_accuracies.index(max(dev_accuracies))]

# Train the final model with the optimal max_depth
final_model = DecisionTreeClassifier(max_depth=optimal_depth, random_state=42)
final_model.fit(X_train, y_train)

# Make predictions on the test set
y_test_pred = final_model.predict(X_test)

# Calculate test accuracy
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Set Accuracy: {test_accuracy:.4f}")


In [None]:
# The training accuracy had a linear shape to its value which means it is becoming more accurate,
# As the data is becoming more complex, therefore the model is not overfitting.
# The development line is majoritively flat with a few bumps, this indicates,
# That the change in the model's complexity is not significantly impacting the performance of the validation set.
