# 1. Problem Information
- **Name:** [**Automated Loan Evaluation System**](https://platform.olimpiada-ai.ro/en/problems/42)
- **Date:** 12/02/2026
- **Type:** Regression

# 2. Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer,make_column_selector
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score

# 3. Data preparation

In [2]:
train = pd.read_csv("data/train.csv")
test = pd.read_csv("data/test.csv")
print(train.shape)
train.head(5)

(40000, 20)


Unnamed: 0,customer_id,age,occupation_status,years_employed,annual_income,credit_score,credit_history_years,savings_assets,current_debt,defaults_on_file,delinquencies_last_2yrs,derogatory_marks,product_type,loan_intent,loan_amount,interest_rate,debt_to_income_ratio,loan_to_income_ratio,payment_to_income_ratio,loan_status
0,CUST116708,40,Self-Employed,14.1,77335,679,4.4,67,17026,0,0,0,Personal Loan,Education,26200,12.04,0.22,0.339,0.113,1
1,CUST109237,18,Employed,0.0,34946,639,1.2,292,2620,0,1,0,Personal Loan,Education,37400,14.95,0.075,1.07,0.357,0
2,CUST102214,29,Employed,9.4,26778,595,5.5,191,4702,1,1,0,Personal Loan,Personal,52300,15.51,0.176,1.953,0.651,0
3,CUST114496,27,Student,1.5,25624,590,5.7,414,2609,0,0,0,Personal Loan,Debt Consolidation,4800,15.91,0.102,0.187,0.062,0
4,CUST127659,18,Employed,0.0,25676,568,1.8,19,3453,1,1,1,Personal Loan,Medical,24700,15.47,0.134,0.962,0.321,0


In [3]:
train.describe().round(3)

Unnamed: 0,age,years_employed,annual_income,credit_score,credit_history_years,savings_assets,current_debt,defaults_on_file,delinquencies_last_2yrs,derogatory_marks,loan_amount,interest_rate,debt_to_income_ratio,loan_to_income_ratio,payment_to_income_ratio,loan_status
count,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0
mean,34.919,7.448,50015.069,643.441,8.16,3606.282,14276.75,0.054,0.555,0.147,33044.025,15.499,0.286,0.703,0.234,0.55
std,11.119,7.591,32591.91,64.587,7.203,13349.066,13205.892,0.226,0.844,0.413,26189.168,4.058,0.16,0.467,0.156,0.497
min,18.0,0.0,15000.0,348.0,0.0,0.0,76.0,0.0,0.0,0.0,500.0,6.0,0.002,0.008,0.003,0.0
25%,26.0,1.3,27206.0,599.75,2.0,130.0,5588.0,0.0,0.0,0.0,12300.0,12.19,0.161,0.332,0.111,0.0
50%,34.0,4.9,41549.0,643.0,6.1,568.0,10379.5,0.0,0.0,0.0,26000.0,15.44,0.265,0.622,0.207,1.0
75%,43.0,11.5,62656.5,687.0,12.6,2264.25,18467.25,0.0,1.0,0.0,48600.0,18.86,0.389,1.013,0.338,1.0
max,70.0,39.9,250000.0,850.0,30.0,300000.0,163344.0,1.0,9.0,4.0,100000.0,23.0,0.8,2.001,0.667,1.0


# 4. Models

In [4]:
X = train.iloc[:,1:-1]
Y = train['loan_status']

In [5]:
transformer = make_column_transformer((OneHotEncoder(),make_column_selector(dtype_include=object)),remainder=StandardScaler())
pipeline = make_pipeline(transformer,RandomForestRegressor())

In [6]:
scores = cross_val_score(pipeline,X,Y,cv=4)
print(scores.mean())

0.7333876141802629


In [7]:
pipeline.fit(X,Y)
predictions = pipeline.predict(test.iloc[:,1:])

# 5. Submission

In [8]:
def AgeClassification(value):
    if value < 30:
        return 'Young'
    if value < 60:
        return 'Adult'
    return 'Senior'
task1 = test['age'].apply(AgeClassification)

In [9]:
def AgeClassification(value):
    if value < 20:
        return 'LowRisk'
    if value < 40:
        return 'MediumRisk'
    return 'HighRisk'
task2 = test['debt_to_income_ratio'].apply(AgeClassification)

In [10]:
task3 = test.current_debt + test.derogatory_marks + test.delinquencies_last_2yrs

In [11]:
df_task1 = pd.DataFrame({
    "subtaskID": [1]*len(test['customer_id']),
    "datapointID": test['customer_id'],
    "answer": task1
})

df_task2 = pd.DataFrame({
    "subtaskID": [2]*len(test['customer_id']),
    "datapointID": test['customer_id'],
    "answer": task2
})
df_task3 = pd.DataFrame({
    "subtaskID": [3]*len(test['customer_id']),
    "datapointID": test['customer_id'],
    "answer": task3
})
df_task4 = pd.DataFrame({
    "subtaskID": [4]*len(test['customer_id']),
    "datapointID": test['customer_id'],
    "answer": predictions
})
submission = pd.concat([df_task1, df_task2, df_task3, df_task4])
submission.head()

Unnamed: 0,subtaskID,datapointID,answer
0,1,CUST146767,Adult
1,1,CUST136829,Adult
2,1,CUST119409,Young
3,1,CUST112393,Young
4,1,CUST143342,Adult


In [12]:
submission.to_csv("submission.csv", index=False)