In [8]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, r2_score


In [9]:
df = pd.read_csv("salaries.csv")

print("Dataset loaded successfully!")
df.head()


Dataset loaded successfully!


Unnamed: 0,company,job,degree,salary
0,Google,Software Engineer,Bachelors,1
1,Google,Software Engineer,Masters,1
2,Google,Data Scientist,Masters,1
3,Google,HR,Bachelors,0
4,Amazon,Software Engineer,Bachelors,1


In [10]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   company  20 non-null     object
 1   job      20 non-null     object
 2   degree   20 non-null     object
 3   salary   20 non-null     int64 
dtypes: int64(1), object(3)
memory usage: 768.0+ bytes


In [11]:
print("Columns:", df.columns.tolist())

salary_col = next(col for col in df.columns if 'salary' in col.lower())
print("Detected salary column:", salary_col)


Columns: ['company', 'job', 'degree', 'salary']
Detected salary column: salary


In [12]:
le = LabelEncoder()

for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = le.fit_transform(df[col])


In [13]:
X = df.drop(salary_col, axis=1)
y = df[salary_col]

X.head(), y.head()


(   company  job  degree
 0        2    3       0
 1        2    3       1
 2        2    1       1
 3        2    2       0
 4        0    3       0,
 0    1
 1    1
 2    1
 3    0
 4    1
 Name: salary, dtype: int64)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [15]:
model = DecisionTreeRegressor(random_state=42)
model.fit(X_train, y_train)

print("Model trained successfully!")


Model trained successfully!


In [16]:
y_pred = model.predict(X_test)
y_pred[:5]


array([1., 1., 0., 1.])

In [17]:
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error:", mae)
print("R2 Score:", r2)


Mean Absolute Error: 0.25
R2 Score: 0.0


In [18]:
sample = X.iloc[[0]]
prediction = model.predict(sample)

print("Predicted Salary:", prediction[0])


Predicted Salary: 1.0
