In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
n = 2000
days_late = np.random.randint(1, 365, size=n)
amount_due = np.random.normal(loc=100, scale=20, size=n)

In [None]:
data = {
    'user_id': np.arange(1, n + 1),
    'days_late': days_late,
    'amount': amount_due,
}
df = pd.DataFrame(data)

In [None]:
conditions = [
    (df['days_late'] <= 30),
    (df['days_late'] > 30) & (df['days_late'] <= 60),
    (df['days_late'] > 60) & (df['days_late'] <= 90),
    (df['days_late'] > 90) & (df['days_late'] <= 120),
    (df['days_late'] > 120)
]
return_probabilities = [0.8, 0.6, 0.4, 0.3, 0.1]

In [None]:
df['debt_returned'] = [
    np.random.choice([0, 1], p=[1 - prob, prob]) for prob in np.select(conditions, return_probabilities)
]

In [None]:
print("Статистика возврата долга:")
print(df['debt_returned'].value_counts())
print(df['debt_returned'].value_counts(normalize=True))

In [None]:
group_labels = ['1-30', '31-60', '61-90', '91-120', '>120']
df['days_group'] = np.select(conditions, group_labels)

# Группировка и нормализация данных
grouped = df.groupby('days_group')['debt_returned'].value_counts(normalize=True).unstack(fill_value=0)
grouped.columns = ['No', 'Yes']

print("nСгруппированные данные:")
print(grouped)

In [None]:
plt.figure(figsize=(10, 6))
grouped.plot(kind='bar', stacked=True, color=['red', 'blue'], width=0.7)
plt.title('Доля возврата долга по группам')
plt.xlabel('Группы по дням просрочки')
plt.ylabel('Доля')
plt.ylim(0, 1)
plt.legend(title='Возврат долга:')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()