In [2]:
# Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Create Employee Dataset
data = {
    'EmpID': ['A12X9', 'Q77B1', 'L55K2', 'M82P7', 'Z90R4', 'T28W6', 'K17N3', 'B91F2'],
    'Name': ['Arjun', 'Meera', 'Ravi', 'Sana', 'Karan', 'Diya', 'Vikram', 'Asha'],
    'Salary': [75000, 54000, 120000, 68000, 95000, 52000, 150000, 48000],
    'Experience': [5, 2, 10, 3, 7, 1, 12, 2],
    'PerformanceScore': [8, 6, 9, 7, 8, 5, 9, 4],
    'Promotion_Status': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No']
}

df = pd.DataFrame(data)
print("Dataset:")
print(df)

le = LabelEncoder()
df['Promotion_Status'] = le.fit_transform(df['Promotion_Status'])

X = df[['Salary', 'Experience', 'PerformanceScore']]
y = df['Promotion_Status']


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

log_model = LogisticRegression()
log_model.fit(X_train_scaled, y_train)

log_predictions = log_model.predict(X_test_scaled)

print("Logistic Regression Results:")
print("Accuracy:", accuracy_score(y_test, log_predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test, log_predictions))
print("Classification Report:\n", classification_report(y_test, log_predictions))

dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)

dt_predictions = dt_model.predict(X_test)

print("Decision Tree Results:")
print("Accuracy:", accuracy_score(y_test, dt_predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test, dt_predictions))
print("Classification Report:\n", classification_report(y_test, dt_predictions))


log_acc = accuracy_score(y_test, log_predictions)
dt_acc = accuracy_score(y_test, dt_predictions)

print("Model Comparison:")
print("Logistic Regression Accuracy:", log_acc)
print("Decision Tree Accuracy:", dt_acc)

if log_acc > dt_acc:
    print("Logistic Regression performed better!")
elif dt_acc > log_acc:
    print("Decision Tree performed better!")
else:
    print("Both models performed equally!")



Dataset:
   EmpID    Name  Salary  Experience  PerformanceScore Promotion_Status
0  A12X9   Arjun   75000           5                 8              Yes
1  Q77B1   Meera   54000           2                 6               No
2  L55K2    Ravi  120000          10                 9              Yes
3  M82P7    Sana   68000           3                 7               No
4  Z90R4   Karan   95000           7                 8              Yes
5  T28W6    Diya   52000           1                 5               No
6  K17N3  Vikram  150000          12                 9              Yes
7  B91F2    Asha   48000           2                 4               No
Logistic Regression Results:
Accuracy: 1.0
Confusion Matrix:
 [[2]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00

