In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
import subprocess
import os
from sklearn.tree import export_text




   

In [None]:

# Load dataset, skip the header row
col_names = ['age','workclass','fnlwgt','education','educational-num','marital-status','occupation','relationship','race','gender','capital-gain','capital-loss','hours-per-week','native-country','income']
df = pd.read_csv("income.csv", names=col_names, skiprows=1)  # Skip the first row
# Print columns
print(df.columns)

In [None]:

# Data preprocessing
# Convert categorical variables to numeric using one-hot encoding
categorical_cols = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'native-country', 'gender']
df = pd.get_dummies(df, columns=categorical_cols)
# Handle missing values by filling with the most frequent value
df.fillna(df.mode().iloc[0], inplace=True)
# Print DataFrame
print(df)

In [None]:


# Split dataset into features and target variable
X = df.drop('income', axis=1)
y = df['income']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train J48 decision tree model using scikit-learn
clf = DecisionTreeClassifier(criterion='entropy')
clf.fit(X_train, y_train)

DecisionTreeClassifier(criterion='entropy')

In [None]:
# Make predictions on the test set
y_pred = clf.predict(X_test)

# Print predictions
print(y_pred)


In [None]:

# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)




In [None]:

# Visualize decision tree as text
tree_rules = export_text(clf, feature_names=list(X.columns))
print(tree_rules)



In [None]:

# Generate the decision tree visualization
dot_data = export_graphviz(clf, out_file=None, filled=True, rounded=True, special_characters=True)

# Specify the path to the Graphviz executable (dot)
dot_path = r"C:\Program Files\Graphviz\bin\dot.exe"  # Update this path to match your Graphviz installation directory

# Save the decision tree visualization to a file
graph = subprocess.run([dot_path, '-Tpng'], input=dot_data.encode(), stdout=subprocess.PIPE)

# Save the decision tree visualization to a file
with open('decision_tree.png', 'wb') as f:
    f.write(graph.stdout)

print(dot_path)