In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

data = pd.read_csv('adult.csv')

data.replace('?', np.nan, inplace=True)

data.dropna(inplace=True)

label_encoder = LabelEncoder()

categorical_columns = ['workclass', 'education', 'marital-status', 'occupation', 
                       'relationship', 'race', 'gender', 'native-country']

for col in categorical_columns:
    data[col] = label_encoder.fit_transform(data[col])

data['income'] = label_encoder.fit_transform(data['income'])

X = data.drop('income', axis=1) 
y = data['income'] 

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

logreg = LogisticRegression()
logreg.fit(X_train, y_train)

y_pred = logreg.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
classification_report_output = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_report_output)


Accuracy: 0.820372963809243
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.94      0.89     10241
           1       0.71      0.45      0.55      3326

    accuracy                           0.82     13567
   macro avg       0.78      0.70      0.72     13567
weighted avg       0.81      0.82      0.81     13567



In [2]:
y_pred_proba = logreg.predict_proba(X_test)[:, 1]  

threshold = 0.3
y_pred_custom = (y_pred_proba >= threshold).astype(int)

accuracy_custom = accuracy_score(y_test, y_pred_custom)
classification_report_custom = classification_report(y_test, y_pred_custom)

print(f"Accuracy with custom threshold: {accuracy_custom}")
print("Classification Report with custom threshold:")
print(classification_report_custom)


Accuracy with custom threshold: 0.7956069875433036
Classification Report with custom threshold:
              precision    recall  f1-score   support

           0       0.89      0.83      0.86     10241
           1       0.57      0.70      0.63      3326

    accuracy                           0.80     13567
   macro avg       0.73      0.76      0.74     13567
weighted avg       0.81      0.80      0.80     13567



In [3]:
y_pred_proba = logreg.predict_proba(X_test)[:, 1]  

threshold = 0.1
y_pred_custom = (y_pred_proba >= threshold).astype(int)

accuracy_custom = accuracy_score(y_test, y_pred_custom)
classification_report_custom = classification_report(y_test, y_pred_custom)

print(f"Accuracy with aggressive threshold: {accuracy_custom}")
print("Classification Report with aggressive threshold:")
print(classification_report_custom)


Accuracy with aggressive threshold: 0.589076435468416
Classification Report with aggressive threshold:
              precision    recall  f1-score   support

           0       0.96      0.48      0.64     10241
           1       0.37      0.93      0.53      3326

    accuracy                           0.59     13567
   macro avg       0.66      0.71      0.58     13567
weighted avg       0.81      0.59      0.61     13567

