In [29]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [27]:
# Function to load and preprocess the dataset
def load_and_preprocess_data(file_path):
    try:
        # Load the dataset
        data = pd.read_csv(file_path, delimiter=';')
        print("Data loaded successfully.")
        
        # Display basic information and the first few rows
        print(data.info())
        print(data.head())

        # Handle missing values if any
        if data.isnull().sum().sum() > 0:
            print("Missing values found. Dropping rows with missing values.")
            data = data.dropna()

        # Convert categorical columns to numeric using one-hot encoding
        data = pd.get_dummies(data, drop_first=True)
        print("Data preprocessing complete.")
        
        return data
    except Exception as e:
        print(f"Error loading or preprocessing data: {e}")
        return None


In [None]:
# Function to build and evaluate the decision tree classifier
def build_and_evaluate_model(data):
    try:
        # Define features and target
        X = data.drop('y_yes', axis=1)
        y = data['y_yes']

        # Split the data into training and test sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
        
        # Initialize and train the classifier
        clf = DecisionTreeClassifier(random_state=42)
        clf.fit(X_train, y_train)

        # Make predictions
        y_pred = clf.predict(X_test)

        # Evaluate the model
        accuracy = accuracy_score(y_test, y_pred)
        print(f"Accuracy: {accuracy:.2f}")
        print("Classification Report:")
        print(classification_report(y_test, y_pred))
        print("Confusion Matrix:")
        print(confusion_matrix(y_test, y_pred))

        # Plot the decision tree
        plt.figure(figsize=(20, 10))
        plot_tree(clf, feature_names=X.columns, class_names=['No', 'Yes'], filled=True)
        plt.title("Decision Tree Visualization")
        plt.show()
        
    except Exception as e:
        print(f"Error building or evaluating model: {e}")

In [None]:
# Main function to execute the process
def main():
    file_path = 'bank-full.csv'  # Update this path to your dataset file
    data = load_and_preprocess_data(file_path)
    if data is not None:
        build_and_evaluate_model(data)

if __name__ == "__main__":
    main()

Data loaded successfully.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45211 entries, 0 to 45210
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        45211 non-null  int64 
 1   job        45211 non-null  object
 2   marital    45211 non-null  object
 3   education  45211 non-null  object
 4   default    45211 non-null  object
 5   balance    45211 non-null  int64 
 6   housing    45211 non-null  object
 7   loan       45211 non-null  object
 8   contact    45211 non-null  object
 9   day        45211 non-null  int64 
 10  month      45211 non-null  object
 11  duration   45211 non-null  int64 
 12  campaign   45211 non-null  int64 
 13  pdays      45211 non-null  int64 
 14  previous   45211 non-null  int64 
 15  poutcome   45211 non-null  object
 16  y          45211 non-null  object
dtypes: int64(7), object(10)
memory usage: 5.9+ MB
None
   age           job  marital  education default  balance housing lo