### **#application_metadata.csv tayyorlash**

In [3]:
# Kerakli kutubxonalarni import qilish
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pyarrow.parquet as pq
import re
from sklearn.preprocessing import RobustScaler
# Vizualizatsiya chiroyli bo'lishi uchun
%matplotlib inline
sns.set(style="whitegrid")


Matplotlib is building the font cache; this may take a moment.


In [4]:
# CSV faylni yuklash
csv_data = pd.read_csv('data/application_metadata.csv')  # fayl nomini moslashtiring
csv_data.head()

# Duplicate va missing values tekshirish
print("Duplicate qatorlar:", csv_data.duplicated().sum())
print("Missing values:\n", csv_data.isnull().sum())


Duplicate qatorlar: 0
Missing values:
 customer_ref                  0
application_id                0
application_hour              0
application_day_of_week       0
account_open_year             0
preferred_contact             0
referral_code                 0
account_status_code           0
random_noise_1                0
num_login_sessions            0
num_customer_service_calls    0
has_mobile_app                0
paperless_billing             0
default                       0
dtype: int64


In [5]:
df_csv = csv_data.drop(columns=['application_id', 'random_noise_1'])
#2ta ustun keraksiz chunki id raqam dan qandaydir xulosa qilish mumkin emas va random_noise_1 bu nomidan ma'lum tasodifiy ma'lumot bizga aniq natija uchun kerak emas
print(df_csv.head())

   customer_ref  application_hour  application_day_of_week  account_open_year  \
0         10000                 5                        6               2013   
1         10001                 4                        2               2015   
2         10002                10                        3               2020   
3         10003                 7                        5               2010   
4         10004                 1                        2               2020   

  preferred_contact referral_code account_status_code  num_login_sessions  \
0              Mail       REF0000               ACT-2                  13   
1             Phone       REF0000               ACT-3                   6   
2             Phone       REF0000               ACT-3                   1   
3             Email       REF0000                 A01                   4   
4              Mail       REF0000               ACT-3                   6   

   num_customer_service_calls  has_mobile_app  pap

In [6]:
categorical_cols = ['preferred_contact', 'referral_code', 'account_status_code']
le_dict = {}

for col in categorical_cols:
    le = LabelEncoder()
    df_csv[col] = le.fit_transform(df_csv[col])
    le_dict[col] = le  
#bu matinli kategoriyalik ma'lumotlarni songa 0 1 2 larga aylantiradi chunki ml maellari sonlar bilan ishlaydi modelimiz kuchli aniq bo'ladi
df_csv  # dastlabki 5 qatorni ko‘rsatadi



Unnamed: 0,customer_ref,application_hour,application_day_of_week,account_open_year,preferred_contact,referral_code,account_status_code,num_login_sessions,num_customer_service_calls,has_mobile_app,paperless_billing,default
0,10000,5,6,2013,1,0,2,13,2,1,1,0
1,10001,4,2,2015,2,0,3,6,1,1,1,1
2,10002,10,3,2020,2,0,3,1,2,1,0,0
3,10003,7,5,2010,0,0,0,4,1,1,1,0
4,10004,1,2,2020,1,0,3,6,2,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
89994,99994,13,5,2016,0,0,0,10,3,0,1,0
89995,99995,13,5,2014,1,7589,4,12,1,1,0,0
89996,99996,7,5,2010,1,0,0,4,2,1,1,1
89997,99997,8,3,2021,0,0,1,5,2,0,0,0


In [7]:
numeric_cols = ['application_hour', 'application_day_of_week', 
                'account_open_year', 'num_login_sessions', 'num_customer_service_calls']

scaler = StandardScaler()
df_csv[numeric_cols] = scaler.fit_transform(df_csv[numeric_cols])
#sonli ustunlarni standartlashtirish 
df_csv

Unnamed: 0,customer_ref,application_hour,application_day_of_week,account_open_year,preferred_contact,referral_code,account_status_code,num_login_sessions,num_customer_service_calls,has_mobile_app,paperless_billing,default
0,10000,-0.938331,1.498690,-0.869907,1,0,2,1.094725,0.003861,1,1,0
1,10001,-1.083235,-0.495413,-0.372464,2,0,3,-0.736088,-0.703855,1,1,1
2,10002,-0.213811,0.003113,0.871142,2,0,3,-2.043812,0.003861,1,0,0
3,10003,-0.648523,1.000164,-1.616070,0,0,0,-1.259177,-0.703855,1,1,0
4,10004,-1.517947,-0.495413,0.871142,1,0,3,-0.736088,0.003861,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
89994,99994,0.220900,1.000164,-0.123743,0,0,0,0.310091,0.711577,0,1,0
89995,99995,0.220900,1.000164,-0.621185,1,7589,4,0.833180,-0.703855,1,0,0
89996,99996,-0.648523,1.000164,-1.616070,1,0,0,-1.259177,0.003861,1,1,1
89997,99997,-0.503619,0.003113,1.119863,0,0,1,-0.997633,0.003861,0,0,0


In [8]:
# CSV sifatida saqlash
df_csv.to_csv('data_new/application_metadata.csv', index=False)


### **#credit_history.parquet**

In [9]:
table = pq.read_table('data/credit_history.parquet', use_pandas_metadata=True)
df_parquet = table.to_pandas()
#parquet fileni o'qish
print(df_parquet.head())


   customer_number  credit_score  num_credit_accounts  oldest_credit_line_age  \
0            10000           696                   14                    22.8   
1            10001           659                   13                     3.5   
2            10002           662                    3                     0.0   
3            10003           676                    8                     9.0   
4            10004           678                    7                     8.0   

   oldest_account_age_months  total_credit_limit  num_delinquencies_2yrs  \
0                      273.6            169100.0                     0.0   
1                       42.0             78200.0                     0.0   
2                        0.0             41400.0                     0.0   
3                      108.0             60000.0                     0.0   
4                       96.0             49700.0                     0.0   

   num_inquiries_6mo  recent_inquiry_count  num_public_r

In [10]:
# Num_delinquencies ustunidagi "no data" qiymatlarni aniqlash
df_parquet['num_delinquencies_2yrs'].value_counts(dropna=False)
df_parquet['num_delinquencies_2yrs'] = df_parquet['num_delinquencies_2yrs'].replace(-0.02, pd.NA)
df_parquet.isna().sum()



customer_number                0
credit_score                   0
num_credit_accounts            0
oldest_credit_line_age         0
oldest_account_age_months      0
total_credit_limit             0
num_delinquencies_2yrs       832
num_inquiries_6mo              0
recent_inquiry_count           0
num_public_records             0
num_collections                0
account_diversity_index        0
dtype: int64

In [11]:
df_parquet['num_delinquencies_2yrs'] = df_parquet['num_delinquencies_2yrs'].fillna(df_parquet['num_delinquencies_2yrs'].mode()[0])

df_parquet.isna().sum()
# missing valuelarni eng ko'p takrorlangan qiymatlar bilan to'ldirdi

customer_number              0
credit_score                 0
num_credit_accounts          0
oldest_credit_line_age       0
oldest_account_age_months    0
total_credit_limit           0
num_delinquencies_2yrs       0
num_inquiries_6mo            0
recent_inquiry_count         0
num_public_records           0
num_collections              0
account_diversity_index      0
dtype: int64

In [12]:
df_csv.rename(columns={'customer_ref': 'customer_id'}, inplace=True)
df_parquet.rename(columns={'customer_number': 'customer_id'}, inplace=True)
#o'xshash ustunlarni birxil qilib olish
df_parquet

Unnamed: 0,customer_id,credit_score,num_credit_accounts,oldest_credit_line_age,oldest_account_age_months,total_credit_limit,num_delinquencies_2yrs,num_inquiries_6mo,recent_inquiry_count,num_public_records,num_collections,account_diversity_index
0,10000,696,14,22.8,273.6,169100.0,0.0,2,2,1,0,0.499
1,10001,659,13,3.5,42.0,78200.0,0.0,6,6,0,0,0.298
2,10002,662,3,0.0,0.0,41400.0,0.0,2,2,0,0,0.174
3,10003,676,8,9.0,108.0,60000.0,0.0,1,1,0,0,0.263
4,10004,678,7,8.0,96.0,49700.0,0.0,1,1,0,0,0.298
...,...,...,...,...,...,...,...,...,...,...,...,...
89994,99994,817,10,8.2,98.4,135600.0,0.0,1,1,0,0,0.285
89995,99995,745,9,8.8,105.6,44600.0,0.0,1,1,0,0,0.353
89996,99996,607,11,1.0,12.0,18300.0,0.0,2,2,1,0,0.238
89997,99997,678,10,3.0,36.0,54300.0,0.0,3,3,0,0,0.227


In [13]:
# CSV sifatida saqlash
df_parquet.to_csv('data_new/credit_history.csv', index=False)


### **# demographics.csv**

In [14]:
df_demo = pd.read_csv('data/demographics.csv')
df_demo.head()

Unnamed: 0,cust_id,age,annual_income,employment_length,employment_type,education,marital_status,num_dependents
0,10000,41,$61800,2.2,Full-time,Graduate,Married,2
1,10001,38,28600,7.0,FULL_TIME,High School,Married,0
2,10002,18,"$20,700",0.8,FULL_TIME,Bachelor,Single,0
3,10003,27,31400,4.8,Full Time,Bachelor,Single,0
4,10004,26,$24600,5.2,Fulltime,High School,Single,0


In [15]:
# ummumiy tekshiruv
print("Duplicate qatorlar:", df_demo.duplicated().sum())
print("Missing values:\n", df_demo.isna().sum())
print(df_demo.info())
print(df_demo.describe())


Duplicate qatorlar: 0
Missing values:
 cust_id                 0
age                     0
annual_income           0
employment_length    2253
employment_type         0
education               0
marital_status          0
num_dependents          0
dtype: int64
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 89999 entries, 0 to 89998
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   cust_id            89999 non-null  int64  
 1   age                89999 non-null  int64  
 2   annual_income      89999 non-null  object 
 3   employment_length  87746 non-null  float64
 4   employment_type    89999 non-null  object 
 5   education          89999 non-null  object 
 6   marital_status     89999 non-null  object 
 7   num_dependents     89999 non-null  int64  
dtypes: float64(1), int64(3), object(4)
memory usage: 5.5+ MB
None
            cust_id           age  employment_length  num_dependents
count  89999.000

In [16]:
# barcha qiymatlarni bir xil ko'rinishga keltirish uchun
df_demo['annual_income'] = df_demo['annual_income'].astype(str)  # avval stringga o'tkazamiz
df_demo['annual_income'] = df_demo['annual_income'].str.replace('$', '', regex=False)
df_demo['annual_income'] = df_demo['annual_income'].str.replace(',', '', regex=False)
df_demo['annual_income'] = df_demo['annual_income'].astype(float)

# dollor belgisini olib tashlash uchun
df_demo.rename(columns={'annual_income': 'annual_income_$'}, inplace=True)

# tekshirish
print(df_demo[['annual_income_$']].head())


   annual_income_$
0          61800.0
1          28600.0
2          20700.0
3          31400.0
4          24600.0


In [17]:
median_employment_length = df_demo['employment_length'].median()
df_demo['employment_length'].fillna(median_employment_length, inplace=True)
# bo'sh qiymatlarni to'ldirish mediana qiymat bilan
print(df_demo[ 'employment_length'].head())
print("Missing values:\n", df_demo.isna().sum())

0    2.2
1    7.0
2    0.8
3    4.8
4    5.2
Name: employment_length, dtype: float64
Missing values:
 cust_id              0
age                  0
annual_income_$      0
employment_length    0
employment_type      0
education            0
marital_status       0
num_dependents       0
dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_demo['employment_length'].fillna(median_employment_length, inplace=True)


In [18]:
df_demo.rename(columns={'cust_id': 'customer_id'}, inplace=True)
# o'xshash ustunlarni birxillashtirish

In [19]:
# df_demo da employment_type ustunidagi noyob qiymatlarni chiqarish
print(df_demo['employment_type'].unique())

# yoki sorted qilib ko'rish
print(sorted(df_demo['employment_type'].dropna().unique()))


['Full-time' 'FULL_TIME' 'Full Time' 'Fulltime' 'Part Time'
 'Self Employed' 'FT' 'Self Emp' 'Contractor' 'Self-employed'
 'SELF_EMPLOYED' 'Contract' 'PART_TIME' 'CONTRACT' 'PT' 'Part-time']
['CONTRACT', 'Contract', 'Contractor', 'FT', 'FULL_TIME', 'Full Time', 'Full-time', 'Fulltime', 'PART_TIME', 'PT', 'Part Time', 'Part-time', 'SELF_EMPLOYED', 'Self Emp', 'Self Employed', 'Self-employed']


In [20]:
# employment_type ustunini standartlashtirish
df_demo['employment_type'] = df_demo['employment_type'].replace({
    'Full-time': 'Full Time',
    'FULL_TIME': 'Full Time',
    'Full Time': 'Full Time',
    'Fulltime': 'Full Time',
    'FT': 'Full Time',
    'Part Time': 'Part Time',
    'PART_TIME': 'Part Time',
    'Part-time': 'Part Time',
    'PT': 'Part Time',
    'Self Employed': 'Self Employed',
    'Self Emp': 'Self Employed',
    'Self-employed': 'Self Employed',
    'SELF_EMPLOYED': 'Self Employed',
    'Contractor': 'Contractor',
    'Contract': 'Contractor',
    'CONTRACT': 'Contractor'
})

# Natijani tekshirish
print(sorted(df_demo['employment_type'].unique()))


['Contractor', 'Full Time', 'Part Time', 'Self Employed']


In [21]:
# 1. Noyob qiymatlarni ko'rish
for col in ['employment_type', 'education', 'marital_status']:
    print(f"Unique values in {col}:")
    print(df_demo[col].unique())
    print("-" * 40)

label_cols = ['employment_type', 'education', 'marital_status']

# Har ustun uchun encoderlarni saqlash (keyinchalik kerak bo‘lsa)
encoders = {}

for col in label_cols:
    le = LabelEncoder()
    df_demo[col] = le.fit_transform(df_demo[col])
    encoders[col] = le

    print(f"{col} mapping:")
    print({cls: int(code) for cls, code in zip(le.classes_, le.transform(le.classes_))})
    print("-" * 40)

print(df_demo.head())

Unique values in employment_type:
['Full Time' 'Part Time' 'Self Employed' 'Contractor']
----------------------------------------
Unique values in education:
['Graduate' 'High School' 'Bachelor' 'Some College' 'Advanced']
----------------------------------------
Unique values in marital_status:
['Married' 'Single' 'Divorced']
----------------------------------------
employment_type mapping:
{'Contractor': 0, 'Full Time': 1, 'Part Time': 2, 'Self Employed': 3}
----------------------------------------
education mapping:
{'Advanced': 0, 'Bachelor': 1, 'Graduate': 2, 'High School': 3, 'Some College': 4}
----------------------------------------
marital_status mapping:
{'Divorced': 0, 'Married': 1, 'Single': 2}
----------------------------------------
   customer_id  age  annual_income_$  employment_length  employment_type  \
0        10000   41          61800.0                2.2                1   
1        10001   38          28600.0                7.0                1   
2        10002  

In [22]:
df_demo

Unnamed: 0,customer_id,age,annual_income_$,employment_length,employment_type,education,marital_status,num_dependents
0,10000,41,61800.0,2.2,1,2,1,2
1,10001,38,28600.0,7.0,1,3,1,0
2,10002,18,20700.0,0.8,1,1,2,0
3,10003,27,31400.0,4.8,1,1,2,0
4,10004,26,24600.0,5.2,1,3,2,0
...,...,...,...,...,...,...,...,...
89994,99994,52,72300.0,3.7,1,1,2,1
89995,99995,61,20000.0,4.2,1,3,0,1
89996,99996,19,20000.0,1.5,1,3,1,3
89997,99997,21,20000.0,3.5,1,3,1,1


In [23]:
df_demo.to_csv("data_new/demographics_cleaned.csv", index=False)
# tozalangan datani saqlash

### **# financial_ratios.jsonl**

In [31]:
df_fin = pd.read_json("data/financial_ratios.jsonl", lines=True)

print("Fayl muvaffaqiyatli o‘qildi!")
print(df_fin.head())
print(df_fin.info())

Fayl muvaffaqiyatli o‘qildi!
   cust_num monthly_income existing_monthly_debt monthly_payment  \
0     10000       5,150.00                738.64         $592.13   
1     10001       2,383.33                392.21        1,013.86   
2     10002       1,725.00                204.07         $317.81   
3     10003       2,616.67               $288.71          234.52   
4     10004       2,050.00               $248.77          334.81   

   debt_to_income_ratio  debt_service_ratio  payment_to_income_ratio  \
0                 0.258            0.258402                    0.115   
1                 0.590            0.589959                    0.425   
2                 0.303            0.302539                    0.184   
3                 0.200            0.199961                    0.090   
4                 0.285            0.284673                    0.163   

   credit_utilization revolving_balance credit_usage_amount available_credit  \
0               0.841       $142,213.10         $

In [32]:
df_fin

Unnamed: 0,cust_num,monthly_income,existing_monthly_debt,monthly_payment,debt_to_income_ratio,debt_service_ratio,payment_to_income_ratio,credit_utilization,revolving_balance,credit_usage_amount,available_credit,total_monthly_debt_payment,annual_debt_payment,loan_to_annual_income,total_debt_amount,monthly_free_cash_flow
0,10000,5150.00,738.64,$592.13,0.258,0.258402,0.115,0.841,"$142,213.10","$142,213.10","$26,886.90",1330.77,15969.24,0.286408,159913.10,3819.23
1,10001,2383.33,392.21,1013.86,0.590,0.589959,0.425,0.971,"$75,932.20",75932.20,"$2,267.80",1406.07,16872.84,3.986014,189932.20,$977.26
2,10002,1725.00,204.07,$317.81,0.303,0.302539,0.184,0.539,22314.6,22314.60,"$19,085.40",521.88,6262.56,0.449275,31614.6,1203.12
3,10003,2616.67,$288.71,234.52,0.200,0.199961,0.090,0.147,8820.00,8820.0,51180.00,523.23,6278.76,0.277070,"$17,520.00",2093.4366666666665
4,10004,2050.00,$248.77,334.81,0.285,0.284673,0.163,0.488,24253.6,24253.6,25446.40,583.58,7002.96,0.292683,"$31,453.60",1466.42
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89994,99994,6025.00,2120.97,519.87,0.438,0.438314,0.086,0.359,48680.40,48680.40,"$86,919.60",2640.84,31690.08,0.156293,"$59,980.40","$3,384.16"
89995,99995,"$1,666.67",138.97,$545.44,0.411,0.410646,0.327,0.355,15833.00,15833.0,"$28,767.00",$684.41,8212.92,0.825000,32333.0,982.26
89996,99996,1666.67,$129.90,616.96,0.448,0.448116,0.370,0.669,12242.70,12242.7,"$6,057.30",$746.86,8962.32,0.890000,30042.7,919.8066666666667
89997,99997,"$1,666.67",162.11,$351.00,0.308,0.307866,0.211,0.562,"$30,516.60",30516.60,23783.4,513.11,6157.32,0.585000,42216.6,"$1,153.56"


In [33]:
dup_count = df_fin.duplicated().sum()
print("Duplicate qatorlar soni:", dup_count)
# dublicatelarni aniqlash 
print("Missing values:\n", df_fin.isna().sum())

Duplicate qatorlar soni: 0
Missing values:
 cust_num                         0
monthly_income                   0
existing_monthly_debt            0
monthly_payment                  0
debt_to_income_ratio             0
debt_service_ratio               0
payment_to_income_ratio          0
credit_utilization               0
revolving_balance             1377
credit_usage_amount              0
available_credit                 0
total_monthly_debt_payment       0
annual_debt_payment              0
loan_to_annual_income            0
total_debt_amount                0
monthly_free_cash_flow           0
dtype: int64


In [35]:
dollar_cols = [
    "monthly_income",
    "existing_monthly_debt",
    "monthly_payment",
    "revolving_balance",
    "credit_usage_amount",
    "available_credit",
    "total_monthly_debt_payment",
    "total_debt_amount",
    "monthly_free_cash_flow"
]

def clean_money(x):
    if pd.isna(x):
        return None

    x = str(x).strip()

    # faqat raqam + ajratuvchi + minus belgisi qoldiramiz
    x = re.sub(r"[^0-9,.\-]", "", x)

    # butunlay bo‘sh bo‘lib qolgan bo‘lsa → None
    if x == "" or x == "-" or x == "--":
        return None

    # ikkidan ortiq ajratuvchi bo‘lsa → barcha vergul va nuqtalarni olib tashlab son qilamiz
    if x.count(",") + x.count(".") > 1:
        # faqat so'nggi ajratuvchi decimal bo'ladi
        chars = list(x)
        # barcha vergul va nuqtalarni yig‘amiz
        seps = [i for i, c in enumerate(chars) if c in ",."]
        
        # oxirgisi decimal nuqta bo‘ladi
        last = seps[-1]
        new = ""
        for i, c in enumerate(chars):
            if i == last:
                new += "."
            elif c in ",.":
                continue
            else:
                new += c
        x = new
    else:
        # 1.234 → 1234 (agar format dargumon bo‘lsa)
        if "." in x and "," in x:
            x = x.replace(",", "")
        # faqat vergul decimal bo‘lsa
        elif "," in x and "." not in x:
            x = x.replace(",", ".")

    try:
        return float(x)
    except:
        return None


# ustunlarni tozalaymiz
for col in dollar_cols:
    df_fin[col] = df_fin[col].apply(clean_money)

# ustun nomiga _usd qo‘shamiz
df_fin.rename(columns={col: col + "_usd" for col in dollar_cols}, inplace=True)

In [36]:
df_fin

Unnamed: 0,cust_num,monthly_income_usd,existing_monthly_debt_usd,monthly_payment_usd,debt_to_income_ratio,debt_service_ratio,payment_to_income_ratio,credit_utilization,revolving_balance_usd,credit_usage_amount_usd,available_credit_usd,total_monthly_debt_payment_usd,annual_debt_payment,loan_to_annual_income,total_debt_amount_usd,monthly_free_cash_flow_usd
0,10000,5150.00,738.64,592.13,0.258,0.258402,0.115,0.841,142213.1,142213.1,26886.9,1330.77,15969.24,0.286408,159913.1,3819.230000
1,10001,2383.33,392.21,1013.86,0.590,0.589959,0.425,0.971,75932.2,75932.2,2267.8,1406.07,16872.84,3.986014,189932.2,977.260000
2,10002,1725.00,204.07,317.81,0.303,0.302539,0.184,0.539,22314.6,22314.6,19085.4,521.88,6262.56,0.449275,31614.6,1203.120000
3,10003,2616.67,288.71,234.52,0.200,0.199961,0.090,0.147,8820.0,8820.0,51180.0,523.23,6278.76,0.277070,17520.0,2093.436667
4,10004,2050.00,248.77,334.81,0.285,0.284673,0.163,0.488,24253.6,24253.6,25446.4,583.58,7002.96,0.292683,31453.6,1466.420000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89994,99994,6025.00,2120.97,519.87,0.438,0.438314,0.086,0.359,48680.4,48680.4,86919.6,2640.84,31690.08,0.156293,59980.4,3384.160000
89995,99995,1666.67,138.97,545.44,0.411,0.410646,0.327,0.355,15833.0,15833.0,28767.0,684.41,8212.92,0.825000,32333.0,982.260000
89996,99996,1666.67,129.90,616.96,0.448,0.448116,0.370,0.669,12242.7,12242.7,6057.3,746.86,8962.32,0.890000,30042.7,919.806667
89997,99997,1666.67,162.11,351.00,0.308,0.307866,0.211,0.562,30516.6,30516.6,23783.4,513.11,6157.32,0.585000,42216.6,1153.560000


In [37]:
df_fin["revolving_balance_usd"] = df_fin["revolving_balance_usd"].fillna(
    df_fin["revolving_balance_usd"].median()
)
#missing valuelarni to'ldiradi