In [8]:
#print hello world
print("Hello, World!")

Hello, World!


In [9]:
import  pandas as pd
#open file bnpl
df = pd.read_csv('bnpl.csv')

In [10]:
# Function to find outliers using IQR method
def find_iqr_outliers(series):
    q1 = series.quantile(0.25)
    q3 = series.quantile(0.75)
    iqr = q3 - q1
    lower, upper = q1 - 1.5*iqr, q3 + 1.5*iqr
    return series[(series < lower) | (series > upper)]

In [11]:
df.info()
#check for outliers in each column
for column in df.select_dtypes(include=['float64', 'int64']).columns:
    outliers = find_iqr_outliers(df[column])
    print(f"Outliers in {column}:\n", outliers)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 15 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   CustomerID                      1000 non-null   object 
 1   failed_traditional_credit       1000 non-null   int64  
 2   bnpl_usage_frequency            1000 non-null   int64  
 3   over_indebtedness_flag          1000 non-null   int64  
 4   financial_stress_score          1000 non-null   int64  
 5   external_repayment_loans        1000 non-null   int64  
 6   credit_card_interest_incidence  1000 non-null   int64  
 7   credit_limit_utilisation        1000 non-null   int64  
 8   payment_delinquency_count       1000 non-null   int64  
 9   impulsive_buying_score          1000 non-null   int64  
 10  financial_literacy_assessment   1000 non-null   int64  
 11  debt_accumulation_metric        1000 non-null   float64
 12  return_dispute_incidents        100

In [12]:
# Create a default flag based on the conditions
default_flag = (
    (df['payment_delinquency_count'] >= 3).astype(int) +
    (df['over_indebtedness_flag'] == 1).astype(int) +
    (df['financial_stress_score'] >= 9).astype(int) +
    (df['bnpl_debt_ratio'] >= 1.8).astype(int) +
    (df['credit_limit_utilisation'] >= 95).astype(int)
) >= 3  # Must meet at least 3 of the 5 conditions
#save the df as a csv file
df['default_flag'] = default_flag.astype(int)
df.to_csv('bnpl_with_default_flag.csv', index=False)
#drop customer_id column
df = df.drop(columns=['CustomerID'])
#save the df as a csv file
df.to_csv('bnpl_cleaned.csv', index=False)

In [15]:
# Step 2: Identify numeric columns
numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns.tolist()

# Step 3: Separate binary and continuous columns
binary_columns = [col for col in numeric_columns if df[col].nunique() == 2]
continuous_columns = [col for col in numeric_columns if df[col].nunique() > 2]

# Step 4: Print the results
print("Numeric columns:", numeric_columns)
print("Binary columns:", binary_columns)
print("Continuous columns (excluding binary):", continuous_columns)

Numeric columns: ['failed_traditional_credit', 'bnpl_usage_frequency', 'over_indebtedness_flag', 'financial_stress_score', 'external_repayment_loans', 'credit_card_interest_incidence', 'credit_limit_utilisation', 'payment_delinquency_count', 'impulsive_buying_score', 'financial_literacy_assessment', 'debt_accumulation_metric', 'return_dispute_incidents', 'demographic_risk_factor', 'bnpl_debt_ratio', 'default_flag']
Binary columns: ['failed_traditional_credit', 'over_indebtedness_flag', 'external_repayment_loans', 'credit_card_interest_incidence', 'default_flag']
Continuous columns (excluding binary): ['bnpl_usage_frequency', 'financial_stress_score', 'credit_limit_utilisation', 'payment_delinquency_count', 'impulsive_buying_score', 'financial_literacy_assessment', 'debt_accumulation_metric', 'return_dispute_incidents', 'demographic_risk_factor', 'bnpl_debt_ratio']
