# **Exemples de comptes de carte de crédit**

Le nettoyage et l'imputation des valeurs manquantes de la table **`credit_card_balance`** nous a donné du fil à retordre. De nombreux relevés sont incomplets, voire incohérents d'un point de vue comptable.

Ce cahier est un support de visualisation de cas, dans le but d'identifier des classes de problème et d'y opposer des stratégies ad hoc.

# Vue d'ensemble

Nombre de clients, nombre de crédits, **distribution du nombre de crédits par client**:

Il y a 104 307 comptes de carte de crédit pour 103 558 clients : la majorité des clients n'on qu'un seul compte.

In [8]:
from home_credit.check import get_credit_card_balance_payment_balance

balance = get_credit_card_balance_payment_balance()
balance.columns = ["TGT", "PID", "CID", "M°", "BAL", "DRW", "RCV", "RCV_TOT", "PYT", "PYT_TOT"]
balance = balance.sort_values(by=["PID", "CID", "M°"], ascending=False)
loans_count = len(balance.PID.unique())
loan_customers_count = len(balance.CID.unique())
print(f"Loans count: {loans_count}")
print(f"Loan customers count: {loan_customers_count}")
display(balance)

load C:/Users/franc/Projects/pepper_credit_scoring_tool\dataset\pqt\credit_card_balance.pqt
Loans count: 104307
Loan customers count: 103558


Unnamed: 0,TGT,PID,CID,M°,BAL,DRW,RCV,RCV_TOT,PYT,PYT_TOT
818129,-1,2843496,425374,15,0.000,0.000,0.000,0.000,,0.000
709719,-1,2843496,425374,14,0.000,1704.780,0.000,0.000,1800.000,945.675
2850284,-1,2843496,425374,13,117387.045,137109.555,115653.780,117387.045,24750.000,17835.300
3662596,-1,2843496,425374,12,179685.045,74143.350,175775.670,178114.545,9000.000,2686.230
1337878,-1,2843496,425374,11,189972.450,35983.845,186200.955,188401.950,31050.000,22459.140
...,...,...,...,...,...,...,...,...,...,...
1375415,0,1000018,394447,6,38879.145,51042.645,37542.645,37542.645,9000.000,9000.000
277653,0,1000018,394447,5,40934.070,2335.500,39576.780,40934.070,3900.870,3900.870
2559582,0,1000018,394447,4,44360.505,2032.560,43376.760,44360.505,3646.710,3646.710
2192275,0,1000018,394447,3,113862.285,69156.945,108091.800,109150.785,3190.635,3190.635


In [9]:
from home_credit.check import aggregate_loans

loans = aggregate_loans()
display(loans)

load C:/Users/franc/Projects/pepper_credit_scoring_tool\dataset\pqt\credit_card_balance.pqt


Unnamed: 0_level_0,Unnamed: 1_level_0,# M°,Σ BAL,Σ DRAW,Σ RCV,Σ RCV_TOT,Σ PYT,Σ PYT_TOT
PID,CID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1000018,394447,5,374731.425,147394.980,361490.985,368012.925,27708.750,27708.750
1000030,361282,8,447928.515,138059.505,443795.625,447483.015,43320.420,21263.580
1000031,131335,16,838311.030,463353.840,822446.055,833599.530,354519.090,354519.090
1000035,436351,5,0.000,0.000,0.000,0.000,0.000,0.000
1000077,181153,11,0.000,0.000,0.000,0.000,0.000,0.000
...,...,...,...,...,...,...,...,...
2843476,197090,95,3604092.165,90000.000,3416069.475,3641557.320,329404.905,328513.500
2843477,168439,85,141361.515,58500.000,132619.005,139800.735,81197.550,67410.000
2843478,424526,90,460026.450,90000.000,440196.615,458796.780,122021.550,109980.000
2843493,337804,15,887098.905,108257.400,850963.500,875772.270,73137.060,61986.735


## V2 qui permet une analyse comptable (un bilan sommaire)

In [13]:
from home_credit.check import get_credit_card_balance_payment_balance

def aggregate_loans_v2():
    balance = get_credit_card_balance_payment_balance()
    balance.columns = [
        "TGT", "PID", "CID", "M°", "BAL",
        "DRW", "RCV", "RCV_TOT", "PYT", "PYT_TOT"
    ]
    balance = balance.sort_values(by=["PID", "CID", "M°"], ascending=False)
    aggregated = (
        balance[balance.columns[1:]]
        .groupby(by=list(balance.columns[1:3]))
        .agg({
            "M°": "count",
            "BAL": ["sum", "first", "last"],
            "DRAW": "sum",
            "PYT": "sum",
            "PYT_TOT": "sum"
        })
    )
    aggregated.columns = ["# M°", "Σ BAL", "BAL_i", "BAL_f", "Σ DRAW", "Σ PYT", "Σ PYT_TOT"]
    return aggregated


In [18]:
loans_v2 = aggregate_loans_v2()
loans_v2["BAL_diff"] = loans_v2.BAL_f - loans_v2.BAL_i
loans_v2["Σ_D-Σ_P"] = loans_v2["Σ DRAW"] - loans_v2["Σ PYT_TOT"]
display(loans_v2.head(30))

load C:/Users/franc/Projects/pepper_credit_scoring_tool\dataset\pqt\credit_card_balance.pqt


Unnamed: 0_level_0,Unnamed: 1_level_0,# M°,Σ BAL,BAL_i,BAL_f,Σ DRAW,Σ PYT,Σ PYT_TOT,BAL_diff,Σ_D-Σ_P
PID,CID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1000018,394447,5,374731.425,38879.145,136695.42,147394.98,27708.75,27708.75,97816.275,119686.23
1000030,361282,8,447928.515,0.0,103027.275,138059.505,43320.42,21263.58,103027.275,116795.925
1000031,131335,16,838311.03,0.0,135786.69,463353.84,354519.09,354519.09,135786.69,108834.75
1000035,436351,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1000077,181153,11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1000083,309691,13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1000087,399664,32,1250499.33,0.0,0.0,136911.195,197287.425,197287.425,0.0,-60376.23
1000089,161517,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1000094,359175,88,2583953.145,0.0,0.0,138601.035,260075.205,256591.035,0.0,-117990.0
1000096,306118,96,3705437.925,177072.57,0.0,416156.265,745889.715,731426.175,-177072.57,-315269.91


# Prêts vides

31 620 comptes de carte de crédit (pour 31 531 clients) ne comportent aucun mouvement d'aucune sorte.

Cela ne les empêche pas de pouvoir être actifs avec un plafond de crédit fixé et parfois variable.

In [3]:
from home_credit.check import get_loan_index, get_null_loans_boolean_index

is_null_loan = get_null_loans_boolean_index(loans)
null_loans_index = get_loan_index(is_null_loan)
print(f"Null loans index ({len(null_loans_index)}): {null_loans_index}")

Null loans index (31620): [1000035, 1000077, 1000083, 1000089, 1000132, 1000186, 1000187, 1000241, 1000339, 1000393, 1000515, 1000566, 1000635, 1000651, 1000663, 1000696, 1000704, 1000727, 1000758, 1000761, 1000865, 1000907, 1000911, 1000913, 1000922, 1000927, 1000972, 1000989, 1000997, 1000999, 1001017, 1001068, 1001220, 1001260, 1001320, 1001385, 1001391, 1001434, 1001517, 1001530, 1001543, 1001545, 1001548, 1001581, 1001582, 1001606, 1001631, 1001633, 1001664, 1001668, 1001713, 1001824, 1001919, 1002225, 1002252, 1002269, 1002335, 1002336, 1002659, 1002660, 1002662, 1002664, 1002722, 1002873, 1002887, 1002902, 1002969, 1002974, 1003015, 1003038, 1003049, 1003059, 1003063, 1003146, 1003182, 1003184, 1003255, 1003298, 1003301, 1003307, 1003317, 1003328, 1003389, 1003414, 1003472, 1003552, 1003601, 1003621, 1003626, 1003634, 1003702, 1003714, 1003716, 1003791, 1003821, 1003861, 1003896, 1003915, 1004005, 1004006, 1004024, 1004061, 1004145, 1004287, 1004289, 1004294, 1004308, 1004327, 1

In [4]:
null_loans = loans[is_null_loan]
null_loans_count = len(null_loans_index)
null_loan_customers_count = len(null_loans.index.get_level_values(1).unique())
print(f"Null loans count: {null_loans_count}")
print(f"Null loan customers count: {null_loan_customers_count}")
display(null_loans)

Null loans count: 31620
Null loan customers count: 31531


Unnamed: 0_level_0,Unnamed: 1_level_0,# M°,Σ BAL,Σ DRAW,Σ PYT,Σ PYT_TOT
PID,CID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1000035,436351,5,0.0,0.0,0.0,0.0
1000077,181153,11,0.0,0.0,0.0,0.0
1000083,309691,13,0.0,0.0,0.0,0.0
1000089,161517,5,0.0,0.0,0.0,0.0
1000132,173111,18,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...
2843249,283041,27,0.0,0.0,0.0,0.0
2843320,401272,27,0.0,0.0,0.0,0.0
2843329,114701,35,0.0,0.0,0.0,0.0
2843332,298901,46,0.0,0.0,0.0,0.0


# Prêt à balance nulle mais non vides

2 167 comptes de carte de crédit (pour 2 160 clients) ont un solde nul sur toute la période d'observation bien que des mouvements aient lieu.

...

In [5]:
from home_credit.check import get_loan_index, get_not_balanced_loans_boolean_index

is_not_balanced_loan = get_not_balanced_loans_boolean_index(loans)
not_balanced_loan_index = get_loan_index(is_not_balanced_loan)
print(f"Not balanced loans index ({len(not_balanced_loan_index)}): {not_balanced_loan_index}")

Not balanced loans index (2167): [1000123, 1001409, 1001865, 1001932, 1002299, 1002655, 1002951, 1003088, 1003308, 1003828, 1004567, 1006499, 1006978, 1008248, 1008925, 1008970, 1009110, 1009354, 1009636, 1009882, 1010411, 1011082, 1011904, 1011976, 1012148, 1012677, 1013571, 1016908, 1017196, 1024414, 1024747, 1024790, 1025342, 1025494, 1025633, 1025652, 1025917, 1026065, 1026503, 1028020, 1028080, 1028208, 1028627, 1029054, 1029487, 1029592, 1030346, 1031122, 1031503, 1032399, 1034068, 1034557, 1034871, 1035106, 1035119, 1036511, 1037535, 1038900, 1039575, 1039885, 1040086, 1041556, 1041919, 1042033, 1043505, 1044403, 1044434, 1046081, 1050250, 1051258, 1051537, 1051817, 1051920, 1051986, 1052236, 1052638, 1053117, 1053330, 1053749, 1054852, 1055365, 1055705, 1055791, 1056245, 1057209, 1057287, 1057479, 1058280, 1059613, 1059815, 1061192, 1061353, 1061528, 1061571, 1061718, 1063069, 1064615, 1065290, 1067643, 1071320, 1071455, 1075332, 1076161, 1076314, 1076437, 1076571, 1076669, 107

In [6]:
not_balanced_loans = loans[is_not_balanced_loan]
not_balanced_loans_count = len(not_balanced_loan_index)
not_balanced_loan_customers_count = len(not_balanced_loans.index.get_level_values(1).unique())
print(f"Not balanced loans count: {not_balanced_loans_count}")
print(f"Not balanced loan customers count: {not_balanced_loan_customers_count}")
display(not_balanced_loans)

Not balanced loans count: 2167
Not balanced loan customers count: 2160


Unnamed: 0_level_0,Unnamed: 1_level_0,# M°,Σ BAL,Σ DRAW,Σ PYT,Σ PYT_TOT
PID,CID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1000123,387909,8,0.0,222740.28,234736.830,224571.330
1001409,151624,8,0.0,225000.00,225000.000,225000.000
1001865,423837,21,0.0,7065.00,7339.725,7339.725
1001932,278723,7,0.0,675000.00,675000.000,675000.000
1002299,261931,9,0.0,103950.00,33295.500,31968.000
...,...,...,...,...,...,...
2841446,230687,5,0.0,247500.00,247500.000,247500.000
2841512,399846,3,0.0,166500.00,166995.000,166995.000
2843012,424873,13,0.0,130.50,391.500,391.500
2843336,174805,52,0.0,22482.00,45166.500,45166.500


# Prêt consistants d'un point de vue comptable

A l'autre extrémité du spectre, recherchons des cas consistants, car l'exploration précédente laisse l'impression que ces données ne le sont pas. Ce sont probablement de réelles aberrations qu'il faudra en partie supprimer dans le cadre du nettoyage des données.

On repart de l'agrégation et on tente un raisonnement comptable de vérification de cohérence des comptes.

**TODO** C'est à améliorer de la manière suivante : la fenêtre de suivie démarre avec un solde initial qui n'est pas nécessairement nul puisque le prêt peut avoir été contracté antérieurement au suivi. Il faut donc spécifiquement identifier (FIRST) ce solde initial au moment de l'agrégation.

# Annexe visualisation

In [1]:
from home_credit.check import load_credit_card_balance, aggregate_loans
from home_credit.check import get_credit_card_balance_payment_balance

data = load_credit_card_balance()
balance = get_credit_card_balance_payment_balance()
loans = aggregate_loans()

load C:/Users/franc/Projects/pepper_credit_scoring_tool\dataset\pqt\credit_card_balance.pqt
load C:/Users/franc/Projects/pepper_credit_scoring_tool\dataset\pqt\application_train.pqt
load C:/Users/franc/Projects/pepper_credit_scoring_tool\dataset\pqt\application_test.pqt
load C:/Users/franc/Projects/pepper_credit_scoring_tool\dataset\pqt\credit_card_balance.pqt
load C:/Users/franc/Projects/pepper_credit_scoring_tool\dataset\pqt\credit_card_balance.pqt


In [2]:
display(loans)

Unnamed: 0_level_0,Unnamed: 1_level_0,# M°,Σ BAL,Σ DRAW,Σ RCV,Σ RCV_TOT,Σ PYT,Σ PYT_TOT
PID,CID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1000018,394447,5,374731.425,147394.980,361490.985,368012.925,27708.750,27708.750
1000030,361282,8,447928.515,138059.505,443795.625,447483.015,43320.420,21263.580
1000031,131335,16,838311.030,463353.840,822446.055,833599.530,354519.090,354519.090
1000035,436351,5,0.000,0.000,0.000,0.000,0.000,0.000
1000077,181153,11,0.000,0.000,0.000,0.000,0.000,0.000
...,...,...,...,...,...,...,...,...
2843476,197090,95,3604092.165,90000.000,3416069.475,3641557.320,329404.905,328513.500
2843477,168439,85,141361.515,58500.000,132619.005,139800.735,81197.550,67410.000
2843478,424526,90,460026.450,90000.000,440196.615,458796.780,122021.550,109980.000
2843493,337804,15,887098.905,108257.400,850963.500,875772.270,73137.060,61986.735


## Prêts vides

### Extraction des prêts de la classe

In [7]:
from home_credit.check import get_loan_index, get_null_loans_boolean_index

b_index = get_null_loans_boolean_index(loans)
index = get_loan_index(b_index)
subset = loans[b_index]

In [8]:
for pid in index[:2]:
    display(balance[balance.PID == pid])

Unnamed: 0,TGT,PID,CID,M°,BAL,DRAW,RCV,RCV_TOT,PYT,PYT_TOT
335638,0,1000035,436351,4,0.0,0.0,0.0,0.0,,0.0
522016,0,1000035,436351,3,0.0,0.0,0.0,0.0,,0.0
686129,0,1000035,436351,6,0.0,0.0,0.0,0.0,,0.0
708488,0,1000035,436351,5,0.0,0.0,0.0,0.0,,0.0
2146369,0,1000035,436351,2,0.0,0.0,0.0,0.0,,0.0


Unnamed: 0,TGT,PID,CID,M°,BAL,DRAW,RCV,RCV_TOT,PYT,PYT_TOT
213256,0,1000077,181153,3,0.0,0.0,0.0,0.0,,0.0
252608,0,1000077,181153,10,0.0,0.0,0.0,0.0,,0.0
1431772,0,1000077,181153,11,0.0,0.0,0.0,0.0,,0.0
1956484,0,1000077,181153,7,0.0,0.0,0.0,0.0,,0.0
2002682,0,1000077,181153,12,0.0,0.0,0.0,0.0,,0.0
2107346,0,1000077,181153,5,0.0,0.0,0.0,0.0,,0.0
2740284,0,1000077,181153,9,0.0,0.0,0.0,0.0,,0.0
3443515,0,1000077,181153,6,0.0,0.0,0.0,0.0,,0.0
3558682,0,1000077,181153,2,0.0,0.0,0.0,0.0,,0.0
3597506,0,1000077,181153,8,0.0,0.0,0.0,0.0,,0.0


### Analyse comptable interne

In [9]:
from home_credit.check import report_loan_analysis

for pid in index[:2]:
    report_loan_analysis(balance, pid)

[1m[36m
TGT: 0 | PID: 1000035 | CID: 436351[0m[0m


Unnamed: 0,M°,BAL,DRAW,RCV,RCV_TOT,PYT,PYT_TOT,BAL_diff,BAL-RCV,IF,REF_pre,rate,D-P
686129,6,0.0,0.0,0.0,0.0,,0.0,,0.0,0.0,,,0.0
708488,5,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0
335638,4,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0
522016,3,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0
2146369,2,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0


[1m[36m
TGT: 0 | PID: 1000077 | CID: 181153[0m[0m


Unnamed: 0,M°,BAL,DRAW,RCV,RCV_TOT,PYT,PYT_TOT,BAL_diff,BAL-RCV,IF,REF_pre,rate,D-P
2002682,12,0.0,0.0,0.0,0.0,,0.0,,0.0,0.0,,,0.0
1431772,11,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0
252608,10,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0
2740284,9,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0
3597506,8,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0
1956484,7,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0
3443515,6,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0
2107346,5,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0
3801904,4,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0
213256,3,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0


### Information étendues et filtrées

In [10]:
from home_credit.check import view_filtered_raw_loan

for pid in index[:5]:
    view_filtered_raw_loan(data, pid)

[1m[36m
TARGET: 0 | SK_ID_PREV: 1000035 | SK_ID_CURR: 436351[0m[0m


MONTHS_BALANCE,6,5,4,3,2
RAW_CREDIT_CARD_BALANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AMT_CREDIT_LIMIT_ACTUAL,225000,225000,225000,225000,225000
NAME_CONTRACT_STATUS,Active,Active,Active,Active,Active


[1m[36m
TARGET: 0 | SK_ID_PREV: 1000077 | SK_ID_CURR: 181153[0m[0m


MONTHS_BALANCE,12,11,10,9,8,7,6,5,4,3,2
RAW_CREDIT_CARD_BALANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AMT_CREDIT_LIMIT_ACTUAL,45000,45000,45000,45000,45000,135000,135000,135000,135000,135000,135000
NAME_CONTRACT_STATUS,Active,Active,Active,Active,Active,Active,Active,Active,Active,Active,Active


[1m[36m
TARGET: 0 | SK_ID_PREV: 1000083 | SK_ID_CURR: 309691[0m[0m


MONTHS_BALANCE,13,12,11,10,9,8,7,6,5,4,3,2,1
RAW_CREDIT_CARD_BALANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AMT_CREDIT_LIMIT_ACTUAL,45000,45000,45000,225000,225000,225000,225000,225000,225000,225000,225000,225000,225000
NAME_CONTRACT_STATUS,Active,Active,Active,Active,Active,Active,Active,Active,Active,Active,Active,Active,Active


[1m[36m
TARGET: 0 | SK_ID_PREV: 1000089 | SK_ID_CURR: 161517[0m[0m


MONTHS_BALANCE,5,4,3,2,1
RAW_CREDIT_CARD_BALANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AMT_CREDIT_LIMIT_ACTUAL,135000,135000,135000,135000,135000
NAME_CONTRACT_STATUS,Completed,Completed,Completed,Completed,Completed


[1m[36m
TARGET: 0 | SK_ID_PREV: 1000132 | SK_ID_CURR: 173111[0m[0m


MONTHS_BALANCE,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2
RAW_CREDIT_CARD_BALANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
AMT_CREDIT_LIMIT_ACTUAL,405000,405000,405000,405000,405000,405000,405000,405000,405000,405000,0,0,0,0,0,0,0,0
NAME_CONTRACT_STATUS,Active,Active,Active,Active,Active,Active,Active,Active,Active,Active,Active,Active,Active,Active,Active,Active,Completed,Completed


In [37]:
display(data[data.SK_ID_CURR == 231106])

RAW_CREDIT_CARD_BALANCE,TARGET,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,AMT_BALANCE,AMT_CREDIT_LIMIT_ACTUAL,AMT_DRAWINGS_ATM_CURRENT,AMT_DRAWINGS_CURRENT,AMT_DRAWINGS_OTHER_CURRENT,AMT_DRAWINGS_POS_CURRENT,...,AMT_RECIVABLE,AMT_TOTAL_RECEIVABLE,CNT_DRAWINGS_ATM_CURRENT,CNT_DRAWINGS_CURRENT,CNT_DRAWINGS_OTHER_CURRENT,CNT_DRAWINGS_POS_CURRENT,CNT_INSTALMENT_MATURE_CUM,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
122522,0,1003088,231106,28,0.0,0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.0,0,0.0,0.0,3.0,Active,0,0
220050,0,1003088,231106,34,0.0,0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.0,0,0.0,0.0,3.0,Active,0,0
340922,0,1003088,231106,95,0.0,90000,0.0,0.0,0.0,0.0,...,0.00,0.00,0.0,0,0.0,0.0,3.0,Active,0,0
357446,0,1003088,231106,15,0.0,0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.0,0,0.0,0.0,3.0,Active,0,0
371706,0,1003088,231106,40,0.0,0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.0,0,0.0,0.0,3.0,Active,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3753655,0,1003088,231106,29,0.0,0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.0,0,0.0,0.0,3.0,Active,0,0
3788758,0,1003088,231106,8,0.0,0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.0,0,0.0,0.0,3.0,Active,0,0
3806299,0,1003088,231106,74,0.0,90000,0.0,0.0,0.0,0.0,...,0.00,0.00,0.0,0,0.0,0.0,3.0,Active,0,0
3809803,0,1003088,231106,47,0.0,0,0.0,0.0,0.0,0.0,...,-27.99,-27.99,0.0,0,0.0,0.0,3.0,Active,0,0


## Prêt à balance nulle mais non vides

2 167 cas partiellement nuls : ils sont intéressants, car il rendent plus apparente la mécanique interne de calcul.

Il y a notamment des cas non aberrants, où le montant retiré est systématiquement remboursé le même mois, ce qui maintient le solde nul.

Sur le cas `1001865` on observe la persistence d'un prélèvement mensuel, de la moitié d'un montant initial ouvrant une période de prélèvements fixes. Il est étrange que cette somme persiste bien que le prêt soit soldé. **Hypothèse** Ce sont probablement des frais, par exemple fonction du niveau de crédit autorisé.

`1001932` fait partie des cas où il n'y a ni frais ni intérêts, une opération blanche : il y en a d'autres du même type, qui constituent une sous-classe : `1003308`, ..

`1002299` montre que les soldes de certains comptes ne sont pas calculés : ils sont nuls alors qu'il y a des mouvements non équilibrés sur le compte. Autres exemples : `1002655`, ...

`1003088` et d'autres semblent montrer que la limite de crédit fixée entraîne des frais mensuels fixes, d'autant plus élevés que le plafond de crédit est élevé : c'est une hypothèse importante à vérifier.

In [7]:
from home_credit.check import get_loan_index, get_not_balanced_loans_boolean_index

b_index = get_not_balanced_loans_boolean_index(loans)
index = get_loan_index(b_index)
subset = loans[b_index]

In [8]:
for pid in index[:2]:
    display(balance[balance.PID == pid])

Unnamed: 0,TGT,PID,CID,M°,BAL,DRAW,RCV,RCV_TOT,PYT,PYT_TOT
64530,0,1000123,387909,9,0.0,0.0,0.0,0.0,,0.0
3444999,0,1000123,387909,8,0.0,43630.47,0.0,0.0,44415.0,42165.0
2560300,0,1000123,387909,7,0.0,875.655,0.0,0.0,1745.19,1745.19
544297,0,1000123,387909,6,0.0,178234.155,0.0,0.0,178290.0,170374.5
2054438,0,1000123,387909,5,0.0,0.0,0.0,0.0,6314.58,6314.58
2370104,0,1000123,387909,4,0.0,0.0,0.0,0.0,3972.06,3972.06
1344205,0,1000123,387909,3,0.0,0.0,0.0,0.0,0.0,0.0
1461911,0,1000123,387909,2,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,TGT,PID,CID,M°,BAL,DRAW,RCV,RCV_TOT,PYT,PYT_TOT
708398,0,1001409,151624,9,0.0,0.0,0.0,0.0,225000.0,225000.0
853551,0,1001409,151624,8,0.0,225000.0,0.0,0.0,0.0,0.0
1095994,0,1001409,151624,7,0.0,0.0,0.0,0.0,0.0,0.0
579004,0,1001409,151624,6,0.0,0.0,0.0,0.0,0.0,0.0
351210,0,1001409,151624,5,0.0,0.0,0.0,0.0,0.0,0.0
683954,0,1001409,151624,4,0.0,0.0,0.0,0.0,0.0,0.0
1527255,0,1001409,151624,3,0.0,0.0,0.0,0.0,0.0,0.0
1630959,0,1001409,151624,2,0.0,0.0,0.0,0.0,0.0,0.0


### Analyse comptable interne

In [9]:
from home_credit.check import report_loan_analysis

for pid in index[:2]:
    report_loan_analysis(balance, pid)

[1m[36m
TGT: 0 | PID: 1000123 | CID: 387909[0m[0m


Unnamed: 0,M°,BAL,DRAW,RCV,RCV_TOT,PYT,PYT_TOT,BAL_diff,BAL-RCV,IF,REF_pre,rate,D-P
64530,9,0.0,0.0,0.0,0.0,,0.0,,0.0,0.0,,,0.0
3444999,8,0.0,43630.47,0.0,0.0,44415.0,42165.0,0.0,0.0,0.0,0.0,,1465.47
2560300,7,0.0,875.655,0.0,0.0,1745.19,1745.19,0.0,0.0,0.0,0.0,,-869.535
544297,6,0.0,178234.155,0.0,0.0,178290.0,170374.5,0.0,0.0,0.0,0.0,,7859.655
2054438,5,0.0,0.0,0.0,0.0,6314.58,6314.58,0.0,0.0,0.0,0.0,,-6314.58
2370104,4,0.0,0.0,0.0,0.0,3972.06,3972.06,0.0,0.0,0.0,0.0,,-3972.06
1344205,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0
1461911,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0


[1m[36m
TGT: 0 | PID: 1001409 | CID: 151624[0m[0m


Unnamed: 0,M°,BAL,DRAW,RCV,RCV_TOT,PYT,PYT_TOT,BAL_diff,BAL-RCV,IF,REF_pre,rate,D-P
708398,9,0.0,0.0,0.0,0.0,225000.0,225000.0,,0.0,0.0,,,-225000.0
853551,8,0.0,225000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,225000.0
1095994,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0
579004,6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0
351210,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0
683954,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0
1527255,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0
1630959,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0


### Information étendues et filtrées

In [12]:
from home_credit.check import view_filtered_raw_loan

for pid in index[:5]:
    view_filtered_raw_loan(data, pid)

[1m[36m
TARGET: 0 | SK_ID_PREV: 1000123 | SK_ID_CURR: 387909[0m[0m


MONTHS_BALANCE,9,8,7,6,5,4,3,2
RAW_CREDIT_CARD_BALANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMT_CREDIT_LIMIT_ACTUAL,45000,180000,180000,180000,180000,180000,180000,180000
AMT_DRAWINGS_CURRENT,0.0,43630.47,875.655,178234.155,0.0,0.0,0.0,0.0
AMT_DRAWINGS_POS_CURRENT,,43630.47,0.0,178234.155,0.0,0.0,0.0,0.0
AMT_INST_MIN_REGULARITY,0.0,0.0,2250.0,0.0,7915.5,0.0,0.0,0.0
AMT_PAYMENT_CURRENT,,44415.0,1745.19,178290.0,6314.58,3972.06,0.0,0.0
AMT_PAYMENT_TOTAL_CURRENT,0.0,42165.0,1745.19,170374.5,6314.58,3972.06,0.0,0.0
CNT_DRAWINGS_CURRENT,0,21,0,26,0,0,0,0
CNT_DRAWINGS_POS_CURRENT,,21.0,0.0,26.0,0.0,0.0,0.0,0.0
CNT_INSTALMENT_MATURE_CUM,0.0,0.0,1.0,1.0,2.0,2.0,2.0,2.0
NAME_CONTRACT_STATUS,Active,Active,Active,Active,Active,Active,Completed,Completed


[1m[36m
TARGET: 0 | SK_ID_PREV: 1001409 | SK_ID_CURR: 151624[0m[0m


MONTHS_BALANCE,9,8,7,6,5,4,3,2
RAW_CREDIT_CARD_BALANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AMT_DRAWINGS_ATM_CURRENT,,225000.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_DRAWINGS_CURRENT,0.0,225000.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_PAYMENT_CURRENT,225000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_PAYMENT_TOTAL_CURRENT,225000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CNT_DRAWINGS_ATM_CURRENT,,5.0,0.0,0.0,0.0,0.0,0.0,0.0
CNT_DRAWINGS_CURRENT,0,5,0,0,0,0,0,0
NAME_CONTRACT_STATUS,Signed,Active,Active,Active,Active,Active,Active,Active


[1m[36m
TARGET: 0 | SK_ID_PREV: 1001865 | SK_ID_CURR: 423837[0m[0m


MONTHS_BALANCE,22,21,20,19,18,17,16,15,14,13,...,11,10,9,8,7,6,5,4,3,2
RAW_CREDIT_CARD_BALANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AMT_CREDIT_LIMIT_ACTUAL,450000,450000,450000,450000,450000,450000,450000,450000,0,0,...,0,0,0,0,0,0,0,0,0,0
AMT_DRAWINGS_CURRENT,0.0,0.0,0.0,0.0,7046.685,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.315,0.0
AMT_DRAWINGS_POS_CURRENT,,,,,7046.685,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_PAYMENT_CURRENT,,,,,7083.315,18.315,18.315,18.315,18.315,18.315,...,18.315,18.315,18.315,18.315,18.315,18.315,18.315,18.315,0.0,0.0
AMT_PAYMENT_TOTAL_CURRENT,0.0,0.0,0.0,0.0,7083.315,18.315,18.315,18.315,18.315,18.315,...,18.315,18.315,18.315,18.315,18.315,18.315,18.315,18.315,0.0,0.0
CNT_DRAWINGS_CURRENT,0,0,0,0,5,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CNT_DRAWINGS_POS_CURRENT,,,,,5.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NAME_CONTRACT_STATUS,Active,Active,Active,Active,Active,Active,Active,Active,Active,Active,...,Active,Active,Active,Active,Active,Active,Active,Active,Completed,Completed


[1m[36m
TARGET: -1 | SK_ID_PREV: 1001932 | SK_ID_CURR: 278723[0m[0m


MONTHS_BALANCE,8,7,6,5,4,3,2
RAW_CREDIT_CARD_BALANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AMT_DRAWINGS_ATM_CURRENT,675000.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_DRAWINGS_CURRENT,675000.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_PAYMENT_CURRENT,675000.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_PAYMENT_TOTAL_CURRENT,675000.0,0.0,0.0,0.0,0.0,0.0,0.0
CNT_DRAWINGS_ATM_CURRENT,20.0,0.0,0.0,0.0,0.0,0.0,0.0
CNT_DRAWINGS_CURRENT,20,0,0,0,0,0,0
NAME_CONTRACT_STATUS,Active,Active,Active,Active,Active,Active,Active


[1m[36m
TARGET: 0 | SK_ID_PREV: 1002299 | SK_ID_CURR: 261931[0m[0m


MONTHS_BALANCE,10,9,8,7,6,5,4,3,2
RAW_CREDIT_CARD_BALANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AMT_DRAWINGS_ATM_CURRENT,92250.0,11250.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_DRAWINGS_CURRENT,92250.0,11700.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_DRAWINGS_POS_CURRENT,0.0,450.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_INST_MIN_REGULARITY,0.0,1062.0,265.5,0.0,0.0,0.0,0.0,0.0,0.0
AMT_PAYMENT_CURRENT,27450.0,963.0,697.5,697.5,697.5,697.5,697.5,697.5,697.5
AMT_PAYMENT_TOTAL_CURRENT,26388.0,697.5,697.5,697.5,697.5,697.5,697.5,697.5,697.5
CNT_DRAWINGS_ATM_CURRENT,6.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CNT_DRAWINGS_CURRENT,6,2,0,0,0,0,0,0,0
CNT_DRAWINGS_POS_CURRENT,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CNT_INSTALMENT_MATURE_CUM,0.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0


## Cas général

Les données ne se tiennent pas, en tous cas d'après ce que j'en ai compris et c'est un vrai problème.

Il est temps que je fasse une pause avant d'y revenir, cela me prend la tête.

**Il est probable que d'autres informations entrent en ligne de compte comme AMT_INST_MIN_REGULARITY ou AMT_CREDIT_LIMIT_ACTUAL**. Pour percer le mystère, le mieux serait de travailler avec Google Sheet sur quelques cas sélectionnés.

In [19]:
from home_credit.check import report_loan_analysis, view_filtered_raw_loan

pid = 1000087
report_loan_analysis(balance, pid)
view_filtered_raw_loan(data, pid)

[1m[36m
TGT: 0 | PID: 1000087 | CID: 399664[0m[0m


Unnamed: 0,M°,BAL,DRAW,RCV,RCV_TOT,PYT,PYT_TOT,BAL_diff,BAL-RCV,IF,REF_pre,rate,D-P
1917975,32,0.0,0.0,0.0,0.0,,0.0,,0.0,0.0,,,0.0
803995,31,93084.93,88100.145,88100.145,89048.43,0.0,0.0,93084.93,4036.5,948.285,0.0,inf,88100.145
426414,30,119830.68,30811.05,113736.195,118728.18,5625.0,5625.0,26745.75,1102.5,4991.985,89048.43,0.056059,25186.05
2026285,29,133506.315,15750.0,127911.195,133143.93,6750.0,6750.0,13675.635,362.385,5232.735,118728.18,0.044073,9000.0
1639628,28,131004.315,0.0,127461.195,132171.705,7200.0,7200.0,-2502.0,-1167.39,4710.51,133143.93,0.035379,-7200.0
1495391,27,136005.75,0.0,132554.835,137042.64,7200.0,7200.0,5001.435,-1036.89,4487.805,132171.705,0.033954,-7200.0
2592501,26,130044.015,2250.0,125381.97,129719.16,7200.0,7200.0,-5961.735,324.855,4337.19,137042.64,0.031648,-4950.0
2623709,25,128229.525,0.0,124820.595,129250.395,6975.0,6975.0,-1814.49,-1020.87,4429.8,129719.16,0.034149,-6975.0
3369792,24,125580.915,0.0,122322.06,126595.755,7200.0,7200.0,-2648.61,-1014.84,4273.695,129250.395,0.033065,-7200.0
2507922,23,123646.32,0.0,120322.26,124643.655,6975.0,6975.0,-1934.595,-997.335,4321.395,126595.755,0.034135,-6975.0


[1m[36m
TARGET: 0 | SK_ID_PREV: 1000087 | SK_ID_CURR: 399664[0m[0m


MONTHS_BALANCE,32,31,30,29,28,27,26,25,24,23,...,10,9,8,7,6,5,4,3,2,1
RAW_CREDIT_CARD_BALANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AMT_BALANCE,0.0,93084.93,119830.68,133506.315,131004.315,136005.75,130044.015,128229.525,125580.915,123646.32,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_CREDIT_LIMIT_ACTUAL,135000,135000,135000,135000,135000,135000,135000,135000,135000,135000,...,0,0,0,0,0,0,0,0,0,0
AMT_DRAWINGS_ATM_CURRENT,,72000.0,18000.0,15750.0,0.0,0.0,2250.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_DRAWINGS_CURRENT,0.0,88100.145,30811.05,15750.0,0.0,0.0,2250.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_DRAWINGS_POS_CURRENT,,16100.145,12811.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_INST_MIN_REGULARITY,,,5449.365,6319.665,7096.86,6987.735,6876.585,6938.55,6836.085,6713.595,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_PAYMENT_CURRENT,,0.0,5625.0,6750.0,7200.0,7200.0,7200.0,6975.0,7200.0,6975.0,...,163.935,163.935,163.935,163.935,163.935,163.935,163.935,163.935,163.935,163.935
AMT_PAYMENT_TOTAL_CURRENT,0.0,0.0,5625.0,6750.0,7200.0,7200.0,7200.0,6975.0,7200.0,6975.0,...,163.935,163.935,163.935,163.935,163.935,163.935,163.935,163.935,163.935,163.935
AMT_RECEIVABLE_PRINCIPAL,0.0,88100.145,113736.195,127911.195,127461.195,132554.835,125381.97,124820.595,122322.06,120322.26,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AMT_RECIVABLE,0.0,89048.43,118728.18,133143.93,132171.705,137042.64,129719.16,129250.395,126595.755,124643.655,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Export Gsheet de quelques cas

In [4]:
from pepper.gsheet_io import df_to_gsheet
from gspread_pandas import Spread
from home_credit.check import get_raw_loan, get_key_cols, get_all_cols_map

gdoc_id = "1aYbWOUhEb-_tksrxrcJa3Y85Gcsa1RIzxp6Tx-7Eizg"
loan_id = 1000031 # 1000035 # 1003088 # 1002655 # 1003308 # 1001932 # 1001865 # 1002299  #1000087
spread = Spread(gdoc_id)
table = get_raw_loan(data, loan_id, get_all_cols_map())
df_to_gsheet(
    table,
    spread,
    str(loan_id),
    as_code=get_key_cols(shorten=True),
    as_fr_FR=list(table.columns[5:-2]),
    start="A1",
    headers=True
)