**Task-03**

**Build a decision tree classifier to predict whether a customer will purchase a product or service based on their demographic and behavioral data. Use a dataset such as the Bank Marketing dataset from the UCI Machine Learning Repository.**

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

In [98]:
df = pd.read_csv('/content/customer_dataset.csv')

In [99]:
data = df.drop(['Gender','Subscription Type','Contract Length'], axis=1)
data.head(10)

Unnamed: 0,CustomerID,Age,Tenure,Usage Frequency,Support Calls,Payment Delay,Total Spend,Last Interaction,Churn
0,1,22,25,14,4,27,598,9,1
1,2,41,28,28,7,13,584,20,0
2,3,47,27,10,2,29,757,21,0
3,4,35,9,12,5,17,232,18,0
4,5,53,58,24,9,2,533,18,0
5,6,30,41,14,10,10,500,29,0
6,7,47,37,15,9,28,574,14,1
7,8,54,36,11,0,18,323,16,0
8,9,36,20,5,10,8,687,8,0
9,10,65,8,4,2,23,995,10,0


In [100]:
data.duplicated().sum()

0

In [101]:
data.isnull().sum()

CustomerID          0
Age                 0
Tenure              0
Usage Frequency     0
Support Calls       0
Payment Delay       0
Total Spend         0
Last Interaction    0
Churn               0
dtype: int64

In [102]:
data.shape

(15000, 9)

In [103]:
threshold_spend = data["Total Spend"].mean()
threshold_interaction = data["Last Interaction"].mean()

def create_purchase_label(row):
    if row['Total Spend'] > threshold_spend and row['Last Interaction'] > threshold_interaction:
        return 1
    else:
        return 0


In [104]:
data['Purchase Label'] = data.apply(create_purchase_label, axis=1)

In [105]:
print(data[['Total Spend', 'Last Interaction', 'Purchase Label']])

       Total Spend  Last Interaction  Purchase Label
0              598                 9               0
1              584                20               1
2              757                21               1
3              232                18               0
4              533                18               0
...            ...               ...             ...
14995          802                20               1
14996          214                 4               0
14997          502                10               0
14998          599                13               0
14999          793                 2               0

[15000 rows x 3 columns]


In [106]:
(data['Purchase Label']).sum()

3779

In [107]:
y = data['Purchase Label']
x = data.drop(['Purchase Label'] , axis=1 )

In [108]:
x.head()

Unnamed: 0,CustomerID,Age,Tenure,Usage Frequency,Support Calls,Payment Delay,Total Spend,Last Interaction,Churn
0,1,22,25,14,4,27,598,9,1
1,2,41,28,28,7,13,584,20,0
2,3,47,27,10,2,29,757,21,0
3,4,35,9,12,5,17,232,18,0
4,5,53,58,24,9,2,533,18,0


In [109]:
y.head()

0    0
1    1
2    1
3    0
4    0
Name: Purchase Label, dtype: int64

In [110]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.35, random_state=42)


In [111]:
clf = DecisionTreeClassifier(random_state=42)


In [112]:
clf.fit(X_train, y_train)

In [113]:
y_pred = clf.predict(X_test)

In [114]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [115]:
print("Classification Report:\n", classification_report(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      3903
           1       1.00      1.00      1.00      1347

    accuracy                           1.00      5250
   macro avg       1.00      1.00      1.00      5250
weighted avg       1.00      1.00      1.00      5250



In [117]:
from sklearn.tree import export_graphviz
import graphviz

dot_data = export_graphviz(clf, out_file=None, feature_names=x.columns, class_names=["No", "Yes"], filled=True, rounded=True)
graph = graphviz.Source(dot_data)
graph.render("decision_tree")

'decision_tree.pdf'