In [1]:
import pandas as pd
import numpy as np
from datetime import date

# 1. パラメータ設定
N_COMPANIES = 100
START_DATE = date(2021, 1, 1)
END_DATE = date(2025, 10, 1) # 2025年10月までを含めるため、10月1日を設定

# 2. company_idとdateの組み合わせリストを作成
company_ids = [f"{i:07d}" for i in range(1000001, 1000001 + N_COMPANIES)]
# 2021年1月から2025年10月までの月次期間を生成 (58ヶ月)
date_range = pd.date_range(start=START_DATE, end=END_DATE, freq='MS')
dates = [d.strftime('%Y%m') for d in date_range]

# 組み合わせテーブルの作成 (5800行)
df_list = []
for company_id in company_ids:
    df_temp = pd.DataFrame({'company_id': company_id, 'date': dates})
    df_list.append(df_temp)

df_dummy = pd.concat(df_list, ignore_index=True)
N_ROWS = len(df_dummy)

# 3. 各企業にベース残高を設定 (データのばらつきを確保)
np.random.seed(42) # 再現性のためのシード固定
base_balances = np.random.randint(1_000_000, 10_000_000, N_COMPANIES)
base_balance_map = dict(zip(company_ids, base_balances))
df_dummy['base_balance'] = df_dummy['company_id'].map(base_balance_map)

# 4. 当月の残高データ（End of month account balance）を生成
monthly_fluctuation = np.random.uniform(0.7, 1.3, N_ROWS)
df_dummy['End of month account balance'] = (df_dummy['base_balance'] * monthly_fluctuation).astype(int)

# 5. その他の残高データ（当月）を生成
# Monthly average account balance: 期末残高の +/- 5% 程度の範囲
fluct_avg = np.random.uniform(0.95, 1.05, N_ROWS)
df_dummy['Monthly average account balance'] = (df_dummy['End of month account balance'] * fluct_avg).astype(int)

# Monthly minimum balance: 期末残高の 50% から 90% の範囲
fluct_min = np.random.uniform(0.5, 0.9, N_ROWS)
df_dummy['Monthly minimum balance'] = (df_dummy['End of month account balance'] * fluct_min).astype(int)

# Maximum monthly balance: 期末残高の 110% から 300% の範囲
fluct_max = np.random.uniform(1.1, 3.0, N_ROWS)
df_dummy['Maximum monthly balance'] = (df_dummy['End of month account balance'] * fluct_max).astype(int)

# 6. 前月の残高データを生成 (当月のデータから +/- 10%程度の変動)
def generate_last_month_data(current_data):
    fluct = np.random.uniform(0.9, 1.1, N_ROWS)
    return (current_data * fluct).astype(int)

df_dummy["Account balance at the end of last month"] = generate_last_month_data(df_dummy['End of month account balance'])
df_dummy["Last month's average account balance"] = generate_last_month_data(df_dummy['Monthly average account balance'])
df_dummy["Last month's minimum balance"] = generate_last_month_data(df_dummy['Monthly minimum balance'])
df_dummy["Last month's maximum balance"] = generate_last_month_data(df_dummy['Maximum monthly balance'])


# 7. 最終的なカラムの選択
columns = [
    "company_id",
    "date",
    "End of month account balance",
    "Monthly average account balance",
    "Monthly minimum balance",
    "Maximum monthly balance",
    "Account balance at the end of last month",
    "Last month's average account balance",
    "Last month's minimum balance",
    "Last month's maximum balance"
]
df_dummy = df_dummy[columns]

# 8. CSVファイルとして出力
output_file = "ai_score_dummy_data_full.csv"
df_dummy.to_csv(output_file, index=False)

# 9. 結果確認 (全行出力は行数が多すぎるため省略し、最初の10行を確認)
print(df_dummy.head(10).to_markdown(index=False))

print(f"総行数: {len(df_dummy)}")
print(f"ダミーデータが '{output_file}' として作成されました。")

|   company_id |   date |   End of month account balance |   Monthly average account balance |   Monthly minimum balance |   Maximum monthly balance |   Account balance at the end of last month |   Last month's average account balance |   Last month's minimum balance |   Last month's maximum balance |
|-------------:|-------:|-------------------------------:|----------------------------------:|--------------------------:|--------------------------:|-------------------------------------------:|---------------------------------------:|-------------------------------:|-------------------------------:|
|      1000001 | 202101 |                        8630284 |                           8599159 |                   5216582 |                  13851346 |                                    9356416 |                                8016925 |                        5400719 |                       12954605 |
|      1000001 | 202102 |                        7395753 |                           720776