# **Analisis Data Pinjaman Konsumen**
Latihan ini bertujuan untuk mengeksplorasi dan menganalisis data pinjaman nasabah dari sebuah lembaga keuangan digital.\
Dataset: `loan_customers.csv`\
Silakan isi kode Python untuk menjawab pertanyaan pada tiap bagian.

In [93]:
# Load library dan data
import pandas as pd

df = pd.read_csv("loan_customers.csv")
df.head()

Unnamed: 0,loan_id,customer_id,age,gender,region,loan_amount,loan_purpose,duration_month,interest_rate,approved,application_date
0,LN1000,CUST1000,56,Male,Central Java,19529899,Kesehatan,12,9.27,True,2025-05-16
1,LN1001,CUST1001,69,Male,West Java,18781862,Pendidikan,24,13.23,True,2025-05-17
2,LN1002,CUST1002,46,Male,Central Java,21644716,Usaha,36,8.12,False,2025-05-10
3,LN1003,CUST1003,32,Female,Central Java,1449395,Konsumtif,36,5.99,True,2025-03-31
4,LN1004,CUST1004,60,Female,East Java,22921117,Usaha,36,6.68,False,2025-05-23


## Bagian 1: Data Preparation

In [94]:
# 1. Tampilkan info dasar
print(df.info())
print(df.columns)
print(df.shape)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   loan_id           150 non-null    object 
 1   customer_id       150 non-null    object 
 2   age               150 non-null    int64  
 3   gender            150 non-null    object 
 4   region            150 non-null    object 
 5   loan_amount       150 non-null    int64  
 6   loan_purpose      150 non-null    object 
 7   duration_month    150 non-null    int64  
 8   interest_rate     150 non-null    float64
 9   approved          150 non-null    bool   
 10  application_date  150 non-null    object 
dtypes: bool(1), float64(1), int64(3), object(6)
memory usage: 12.0+ KB
None
Index(['loan_id', 'customer_id', 'age', 'gender', 'region', 'loan_amount',
       'loan_purpose', 'duration_month', 'interest_rate', 'approved',
       'application_date'],
      dtype='object')
(150, 11)


In [95]:
# 2. Ubah kolom application_date ke datetime
df['application_date'] = pd.to_datetime(df['application_date']) # Mengonversi kolom 'application_date' dalam DataFrame df menjadi tipe data datetime
df['application_date'].head() # Menampilkan 5 baris pertama dari kolom 'application_date' setelah konversi

0   2025-05-16
1   2025-05-17
2   2025-05-10
3   2025-03-31
4   2025-05-23
Name: application_date, dtype: datetime64[ns]

In [96]:
# 3. Tambahkan kolom monthly_payment
# Formula: (loan_amount * (1 + interest_rate / 100)) / duration_month
df['monthly_payment'] = (df['loan_amount'] * (1 + (df['interest_rate'] / 100))) / df['duration_month']
df.head()

Unnamed: 0,loan_id,customer_id,age,gender,region,loan_amount,loan_purpose,duration_month,interest_rate,approved,application_date,monthly_payment
0,LN1000,CUST1000,56,Male,Central Java,19529899,Kesehatan,12,9.27,True,2025-05-16,1778360.0
1,LN1001,CUST1001,69,Male,West Java,18781862,Pendidikan,24,13.23,True,2025-05-17,886112.6
2,LN1002,CUST1002,46,Male,Central Java,21644716,Usaha,36,8.12,False,2025-05-10,650063.0
3,LN1003,CUST1003,32,Female,Central Java,1449395,Konsumtif,36,5.99,True,2025-03-31,42672.6
4,LN1004,CUST1004,60,Female,East Java,22921117,Usaha,36,6.68,False,2025-05-23,679229.1


In [97]:
# 4. Tangani missing values jika ada
missing_values = df.isnull().sum()
missing_values

loan_id             0
customer_id         0
age                 0
gender              0
region              0
loan_amount         0
loan_purpose        0
duration_month      0
interest_rate       0
approved            0
application_date    0
monthly_payment     0
dtype: int64

In [98]:
# 5. Hapus data dengan usia < 18 atau > 65
# Memfilter DataFrame df menggunakan metode query untuk menyimpan hanya baris dengan usia antara 18 dan 65 tahun
df = df.query('age >= 18 and age <= 65')
# Menampilkan 5 baris pertama dari DataFrame setelah pemfilteran
df.head()

Unnamed: 0,loan_id,customer_id,age,gender,region,loan_amount,loan_purpose,duration_month,interest_rate,approved,application_date,monthly_payment
0,LN1000,CUST1000,56,Male,Central Java,19529899,Kesehatan,12,9.27,True,2025-05-16,1778360.0
2,LN1002,CUST1002,46,Male,Central Java,21644716,Usaha,36,8.12,False,2025-05-10,650063.0
3,LN1003,CUST1003,32,Female,Central Java,1449395,Konsumtif,36,5.99,True,2025-03-31,42672.6
4,LN1004,CUST1004,60,Female,East Java,22921117,Usaha,36,6.68,False,2025-05-23,679229.1
5,LN1005,CUST1005,25,Female,East Java,7754350,Konsumtif,36,6.66,True,2025-05-26,229744.2


## Bagian 2: Analisis dan Agregasi

In [99]:
# 6. Rata-rata dan median loan_amount per loan_purpose
# Mengelompokkan data berdasarkan 'loan_purpose' dan menghitung rata-rata dan median dari 'loan_amount'
df.groupby('loan_purpose')['loan_amount'].agg(['mean', 'median']) 

Unnamed: 0_level_0,mean,median
loan_purpose,Unnamed: 1_level_1,Unnamed: 2_level_1
Kesehatan,11113420.0,11074901.0
Konsumtif,12193810.0,11503421.5
Pendidikan,11487970.0,12098716.0
Usaha,13512570.0,14279024.0


In [111]:
# 7. Wilayah dengan tingkat approval tertinggi
approval_rate = df[df['approved'] == True].groupby('region').size() / df.groupby('region').size()
approval_rate

region
Bali            0.863636
Central Java    0.724138
East Java       0.709677
Jakarta         0.787879
West Java       0.814815
dtype: float64

In [112]:
# 8. Ringkasan pinjaman dan suku bunga berdasarkan region & gender
# Kelompokkan data berdasarkan region dan gender
grouped = df.groupby(['region', 'gender'])
# Hitung total pinjaman dan rata-rata suku bunga
summary = grouped[['loan_amount', 'interest_rate']].agg({
    'loan_amount': 'sum',
    'interest_rate': 'mean'
}).reset_index()
# Ganti nama kolom biar lebih jelas
summary.columns = ['Region', 'Gender', 'Total Pinjaman', 'Rata-rata Suku Bunga']
# Tampilkan hasil ringkasan
summary

Unnamed: 0,Region,Gender,Total Pinjaman,Rata-rata Suku Bunga
0,Bali,Female,111784221,11.074615
1,Bali,Male,104803303,10.024444
2,Central Java,Female,195106177,10.856111
3,Central Java,Male,164595654,11.503636
4,East Java,Female,127153320,9.84375
5,East Java,Male,249880910,10.563043
6,Jakarta,Female,228124935,11.036667
7,Jakarta,Male,190152749,9.728
8,West Java,Female,172873123,12.769231
9,West Java,Male,169552595,9.920714


In [102]:
# 9. 5 nasabah dengan monthly_payment tertinggi
top_5_customers = df.nlargest(5, 'monthly_payment')[['customer_id', 'monthly_payment']]
display(top_5_customers)

Unnamed: 0,customer_id,monthly_payment
63,CUST1063,4564254.0
95,CUST1095,4541171.0
101,CUST1101,4492769.0
41,CUST1041,4174783.0
143,CUST1143,4066458.0


In [117]:
# 10. Perbandingan pinjaman konsumtif vs usaha (interest_rate & duration)
# Kelompokkan berdasarkan tujuan pinjaman (konsumtif vs usaha)
comparison = df.groupby('loan_purpose')[['interest_rate', 'duration_month']].mean().reset_index()
# Ganti nama kolom biar lebih mudah dibaca
comparison.columns = ['Jenis Pinjaman', 'Rata-rata Suku Bunga', 'Rata-rata Durasi (bulan)']
# Tampilkan hasil
comparison

Unnamed: 0,Jenis Pinjaman,Rata-rata Suku Bunga,Rata-rata Durasi (bulan)
0,Kesehatan,11.021714,18.342857
1,Konsumtif,10.662045,18.954545
2,Pendidikan,10.36,20.322581
3,Usaha,10.976875,20.25


## Bagian 3: Indexing dan Sorting

In [104]:
# 11. Jadikan loan_id sebagai index
# TODO

In [105]:
# 12. Sort berdasarkan application_date dan loan_amount (descending)
# TODO

In [106]:
# 13. Buat MultiIndex region dan loan_purpose, tampilkan subset tertentu
# TODO

## Bagian 4: Insight dan Rekomendasi

In [107]:
# 14. Analisis jumlah pinjaman berdasarkan gender
# TODO

In [108]:
# 15. Apakah durasi pinjaman mempengaruhi approval?
# TODO

In [109]:
# 16. Rekomendasi strategi pinjaman berdasarkan analisis
# TODO

### 🎯 Silakan gunakan markdown dan komentar untuk menjelaskan jawaban Anda.