# Task 12 - Classification Algorithms II

## 1. Predicting Employee Attrition using Logistic Regression

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score

df = pd.read_csv("employee_attrition.csv")

df = df.apply(lambda col: LabelEncoder().fit_transform(col) if col.dtypes == 'object' else col)

X = df.drop("Attrition", axis=1)
y = df["Attrition"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

Precision: 0.6842105263157895
Recall: 0.3333333333333333
F1 Score: 0.4482758620689655


## 2. Predicting Heart Disease using KNN

In [8]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, roc_auc_score

df = pd.read_csv("heart_disease.csv")
df = df.fillna(df.mean())
df = df.apply(lambda col: LabelEncoder().fit_transform(col) if col.dtypes == 'object' else col)

X = df.drop("target", axis=1)
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_pred))

Accuracy: 0.9016393442622951
ROC-AUC: 0.9030172413793103


## 3. Predicting Hospital Readmission using Logistic Regression

In [12]:
df = pd.read_csv("hospital_readmissions.csv")

for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = df[col].fillna(df[col].mode()[0])

df = df.apply(lambda col: LabelEncoder().fit_transform(col) if col.dtypes == 'object' else col)

X = df.drop("readmitted", axis=1)
y = df["readmitted"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

Precision: 0.628722700198544
Recall: 0.40563620836891545
F1 Score: 0.4931222424085128


## 4. Classifying Credit Card Fraud using Decision Tree

In [13]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix

df = pd.read_csv("creditcard.csv")
df = df.fillna(df.median())

X = df.drop("Class", axis=1)
y = df["Class"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("ROC-AUC:", roc_auc_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

ROC-AUC: 0.8976602247539421
Confusion Matrix:
 [[56830    34]
 [   20    78]]


## 7. Predicting Diabetes using Random Forest

In [19]:
from sklearn.ensemble import RandomForestClassifier

df = pd.read_csv("diabetes.csv")
df = df.fillna(df.median())

X = df.drop("Outcome", axis=1)
y = df["Outcome"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_pred))
print("Feature Importances:", model.feature_importances_)

Accuracy: 0.7207792207792207
ROC-AUC: 0.697979797979798
Feature Importances: [0.07655143 0.25886402 0.08813384 0.06564625 0.0761219  0.1699835
 0.1237676  0.14093146]


## 8. Classifying Iris Flowers using SVM

In [21]:
from sklearn.svm import SVC

df = pd.read_csv("Iris.csv")

X = df.drop("Species", axis=1)
y = LabelEncoder().fit_transform(df["Species"])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = SVC()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='macro'))
print("Recall:", recall_score(y_test, y_pred, average='macro'))

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
