In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load the data
df = pd.read_csv('Test data.xlsx - Query result.csv')

# Fill missing values for simplicity, here with mode for categorical and median for numerical
df = df.copy()  # Ensure we are working with a copy to avoid SettingWithCopyWarning

for column in df.select_dtypes(include=['object']).columns:
    df[column] = df[column].fillna(df[column].mode()[0])

for column in df.select_dtypes(include=[np.number]).columns:
    df[column] = df[column].fillna(df[column].median())

# Encode categorical variables
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# Split the data into features and target
X = df.drop(columns=['status_id'])
y = df['status_id']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a logistic regression model
logistic_model = LogisticRegression(max_iter=1000)
logistic_model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred_logistic = logistic_model.predict(X_test_scaled)

# Define the mapping for the logistic regression outputs
output_mapping = {2: 'Paid', 5: 'Unpaid'}

# Apply the mapping to the predictions
y_pred_logistic_mapped = [output_mapping.get(label, 'Unpaid') for label in y_pred_logistic]

# Apply the mapping to the true labels for comparison
y_test_mapped = y_test.map(output_mapping)

# Evaluate the model with the new mapped outputs
accuracy_logistic_mapped = accuracy_score(y_test_mapped, y_pred_logistic_mapped)
classification_report_logistic_mapped = classification_report(y_test_mapped, y_pred_logistic_mapped)

print("Accuracy:", accuracy_logistic_mapped)
print("Classification Report:
", classification_report_logistic_mapped)
