In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, roc_curve, accuracy_score
from sklearn.impute import SimpleImputer

# Step 1: Load the development data
dev_data = pd.read_csv('Dev_data_to_be_shared.csv')

# Separate features and target variable
X = dev_data.drop(columns=['account_number', 'bad_flag'])
y = dev_data['bad_flag']

# Step 2: Preprocess the data
# Handle missing values
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Step 4: Train a logistic regression model
model = LogisticRegression(random_state=42, max_iter=500)
model.fit(X_train, y_train)

# Step 5: Evaluate the model on the test set
y_pred_prob = model.predict_proba(X_test)[:, 1]
auc_score = roc_auc_score(y_test, y_pred_prob)
print(f"AUC Score: {auc_score}")

# Step 6: Load the validation data
validation_data = pd.read_csv('validation_data_to_be_shared.csv')
validation_account_numbers = validation_data['account_number']
X_validation = validation_data.drop(columns=['account_number'])

# Preprocess the validation data
X_validation_imputed = imputer.transform(X_validation)
X_validation_scaled = scaler.transform(X_validation_imputed)

# Step 7: Predict probabilities on the validation data
validation_pred_prob = model.predict_proba(X_validation_scaled)[:, 1]

# Step 8: Save predictions to a CSV file
output = pd.DataFrame({
    'account_number': validation_account_numbers,
    'predicted_probability': validation_pred_prob
})
output.to_csv('validation_predictions.csv', index=False)

print("Predictions saved to 'validation_predictions.csv'")



AUC Score: 0.7488292556122014




Predictions saved to 'validation_predictions.csv'
