In [48]:
import pandas as pd
import matplotlib.pyplot as plt

In [29]:
df = pd.read_csv('salaries.csv')

# Data Exploration

In [30]:
df.shape

(16, 4)

In [31]:
df.head()

Unnamed: 0,company,job,degree,salary_more_then_100k
0,google,sales executive,bachelors,0
1,google,sales executive,masters,0
2,google,business manager,bachelors,1
3,google,business manager,masters,1
4,google,computer programmer,bachelors,0


In [32]:
df.isnull().sum()

company                  0
job                      0
degree                   0
salary_more_then_100k    0
dtype: int64

# Data Preprocessing

In [33]:
df['degree'] = df.apply(lambda row : 0 if row.degree == 'bachelors' else 1, axis=1)

In [34]:
df.head()

Unnamed: 0,company,job,degree,salary_more_then_100k
0,google,sales executive,0,0
1,google,sales executive,1,0
2,google,business manager,0,1
3,google,business manager,1,1
4,google,computer programmer,0,0


In [35]:
df = pd.get_dummies(df, columns=['job', 'company'], drop_first=True)

In [36]:
df.head()

Unnamed: 0,degree,salary_more_then_100k,job_computer programmer,job_sales executive,company_facebook,company_google
0,0,0,False,True,False,True
1,1,0,False,True,False,True
2,0,1,False,False,False,True
3,1,1,False,False,False,True
4,0,0,True,False,False,True


# Model Training

In [53]:
x = df.drop('salary_more_then_100k', axis=1)
y = df['salary_more_then_100k']

In [54]:
from sklearn.tree import DecisionTreeClassifier, export_text

model = DecisionTreeClassifier()
model.fit(x, y)

In [55]:
y_pred = model.predict(x)

# Model Evaluvation

In [56]:
from sklearn.metrics import classification_report

report = classification_report(y, y_pred)
print(report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       1.00      1.00      1.00        10

    accuracy                           1.00        16
   macro avg       1.00      1.00      1.00        16
weighted avg       1.00      1.00      1.00        16



In [58]:

print(export_text(model, feature_names=list(x.columns)))


|--- company_facebook <= 0.50
|   |--- job_sales executive <= 0.50
|   |   |--- degree <= 0.50
|   |   |   |--- company_google <= 0.50
|   |   |   |   |--- class: 0
|   |   |   |--- company_google >  0.50
|   |   |   |   |--- job_computer programmer <= 0.50
|   |   |   |   |   |--- class: 1
|   |   |   |   |--- job_computer programmer >  0.50
|   |   |   |   |   |--- class: 0
|   |   |--- degree >  0.50
|   |   |   |--- class: 1
|   |--- job_sales executive >  0.50
|   |   |--- class: 0
|--- company_facebook >  0.50
|   |--- class: 1

