In [316]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt

In [317]:
# 過去10年間の出生数データを読み込む
births_datas = pd.read_csv("birth.csv",index_col=0)
births_datas.head()

Unnamed: 0_level_0,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
千代田区,350,387,380,378,469,466,619,599,627,675,595,650,673,616,585
中央区,1222,1332,1447,1432,1495,1694,1838,1983,2032,2079,2108,2088,2144,2010,1940
港区,2277,2403,2510,2402,2638,2727,3051,3118,3251,3123,3042,2919,2817,2595,2436
新宿区,1997,2232,2192,2135,2299,2469,2515,2612,2790,2788,2679,2516,2591,2465,2328
文京区,1531,1534,1635,1715,1694,1859,1917,2034,2167,2074,2119,2044,2109,1884,1845


In [318]:
# 多項式回帰モデルを用いて来年の出生数を予測する
predicted_births = []
for ward in births_datas.index:
    # 各市区町村ごとにモデルを作成し、2023年を予測していく
    births = births_datas.loc[ward].values.reshape(-1, 1)
    years = births_datas.columns.values.reshape(-1, 1)

    # 多項式回帰モデルを用いて来年の出生数を予測する
    # a0 + a1x + a2x^2 + a3x^3 ・・・のa0,a1・・・を算出
    poly = PolynomialFeatures(degree=7)
    X_poly = poly.fit_transform(years)
    # モデル作成
    model = LinearRegression()
    model.fit(X_poly, births)
    
    # 来年を予測
    next_year = pd.Series([2023])
    next_year_data = pd.DataFrame({"year": next_year})
    next_year_data_poly = poly.fit_transform(next_year_data)
    predicted_births.append(int(model.predict(next_year_data_poly)[0][0]))

In [327]:
# 出生数のデータを更新
births_datas["2023"]=[num for num in predicted_births]
births_datas

Unnamed: 0_level_0,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
千代田区,350,387,380,378,469,466,619,599,627,675,595,650,673,616,585,502
中央区,1222,1332,1447,1432,1495,1694,1838,1983,2032,2079,2108,2088,2144,2010,1940,1735
港区,2277,2403,2510,2402,2638,2727,3051,3118,3251,3123,3042,2919,2817,2595,2436,1935
新宿区,1997,2232,2192,2135,2299,2469,2515,2612,2790,2788,2679,2516,2591,2465,2328,2043
文京区,1531,1534,1635,1715,1694,1859,1917,2034,2167,2074,2119,2044,2109,1884,1845,1602
台東区,1208,1200,1339,1312,1382,1471,1610,1619,1590,1661,1669,1634,1612,1570,1463,1332
墨田区,1949,2038,2074,1971,2094,2191,2298,2562,2486,2599,2516,2434,2370,2295,2152,1828
江東区,4096,4442,4454,4359,4340,4752,4790,5100,4980,4793,4643,4334,4325,4206,3876,3433
品川区,3011,3167,3100,3242,3380,3610,3550,3830,3889,3800,3823,3810,3845,3632,3460,3184
目黒区,2031,2091,2216,2219,2317,2400,2590,2577,2576,2593,2500,2456,2353,2159,2179,1885


In [320]:
rate_data = pd.read_json("rate.json")
rate_data["name"]= [name.split("の")[0] for name in rate_data["name"]]
rate_data.head()

Unnamed: 0,name,rate,capacity,entry_num
0,千代田区,"{'2022': 90.2, '2021': 86.9, '2020': 82.1, '20...","{'2022': '2,010人', '2021': '2,069人', '2020': '...","{'2022': '377件', '2021': '435件', '2020': '515件..."
1,中央区,"{'2022': 74.3, '2021': 68.2, '2020': 59.5, '20...","{'2022': '6,119人', '2021': '5,733人', '2020': '...","{'2022': '1,503件', '2021': '1,881件', '2020': '..."
2,港区,"{'2022': 67.5, '2021': 69.5, '2020': 61.1, '20...","{'2022': '6,773人', '2021': '6,755人', '2020': '...","{'2022': '1,842件', '2021': '1,998件', '2020': '..."
3,新宿区,"{'2022': 76.0, '2021': 72.0, '2020': 69.0, '20...","{'2022': '7,952人', '2021': '7,896人', '2020': '...","{'2022': '1,732件', '2021': '1,879件', '2020': '..."
4,台東区,"{'2022': 69.4, '2021': 65.4, '2020': 62.2, '20...","{'2022': '4,328人', '2021': '4,046人', '2020': '...","{'2022': '1,227件', '2021': '1,366件', '2020': '..."


In [321]:
city_list = rate_data["name"].drop_duplicates().tolist()
# city_list

In [322]:
births_datas2 = births_datas.loc[:,"2018":"2022"]
births_datas2 = births_datas2[["2022","2021","2020","2019","2018"]]

births_datas2.head()

Unnamed: 0_level_0,2022,2021,2020,2019,2018
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
千代田区,585,616,673,650,595
中央区,1940,2010,2144,2088,2108
港区,2436,2595,2817,2919,3042
新宿区,2328,2465,2591,2516,2679
文京区,1845,1884,2109,2044,2119


In [323]:
years= ["2018","2019","2020","2021","2022"]
datas = pd.DataFrame()
for year in years:
    buf =  pd.DataFrame()
    buf["city"] = rate_data["name"]
    buf["year"] = int(year)
    buf["rate"] = rate_data["rate"].str.get(year)
    buf = pd.merge(buf,births_datas2[year],on="city",how="left")
    buf.rename(columns={year:"births"},inplace=True)
    datas = pd.concat([datas,buf],axis=0, ignore_index=True)

datas.head()
# print(type(datas["year"][0]))

Unnamed: 0,city,year,rate,births
0,千代田区,2018,62.0,595
1,中央区,2018,61.6,2108
2,港区,2018,39.9,3042
3,新宿区,2018,70.0,2679
4,台東区,2018,55.4,1669


In [324]:
# 2023年のデータを作成
predX = pd.DataFrame()
predX["city"] = city_list
predX["year"] = 2023
predX= pd.merge(predX,births_datas["2023"],on="city",how="left")
predX.rename(columns={"2023":"births"},inplace=True)

predX.head()
# print(len(predX))

Unnamed: 0,city,year,births
0,千代田区,2023,502
1,中央区,2023,1735
2,港区,2023,1935
3,新宿区,2023,2043
4,台東区,2023,1332


In [325]:
predicted_rate = []

# 重回帰モデルを使ってモデルを作成する
trainX = datas[["city","year","births"]]
trainX = pd.get_dummies(trainX)

y = datas["rate"]

# モデル作成
model = LinearRegression()
model.fit(trainX, y)

# 2023年を予測
predX_dumm = pd.get_dummies(predX)
rate_2023 =model.predict(predX_dumm)

predX["rate"] = rate_2023

print(predX)


     city  year  births  rate
0    千代田区  2023     502  84.6
1     中央区  2023    1735  71.7
2      港区  2023    1935  60.8
3     新宿区  2023    2043  78.2
4     台東区  2023    1332  70.4
5     文京区  2023    1602  83.0
6     江東区  2023    3433  87.6
7     墨田区  2023    1828  81.9
8     品川区  2023    3184  80.9
9     大田区  2023    4606  79.3
10    目黒区  2023    1885  80.2
11   世田谷区  2023    5705  72.9
12    渋谷区  2023    1575  75.3
13    中野区  2023    2085  79.5
14    杉並区  2023    3572  83.3
15    豊島区  2023    1799  95.5
16     北区  2023    2228  87.9
17    荒川区  2023    1525  90.8
18    板橋区  2023    2983  85.1
19    練馬区  2023    4817  83.9
20    足立区  2023    3734  80.6
21    葛飾区  2023    2730  88.9
22   江戸川区  2023    4006  75.6
23   八王子市  2023    2675 101.1
24    立川市  2023     994  88.6
25   武蔵野市  2023    1044  79.3
26    三鷹市  2023    1122  85.8
27    青梅市  2023     487 103.5
28    府中市  2023    1432  82.0
29    昭島市  2023     764  54.1
30    調布市  2023    1434  84.6
31   小金井市  2023     903  76.4
32    町田市 

In [326]:
pd.options.display.float_format = '{:.1f}'.format

# 入園率のまとめ
years= ["2018","2019","2020","2021","2022"]
new_rate = pd.DataFrame()

new_rate["city"] = city_list
for year in years:
    new_rate[year] = rate_data["rate"].str.get(year)

new_rate["2023"] = rate_2023

new_rate



Unnamed: 0,city,2018,2019,2020,2021,2022,2023
0,千代田区,62.0,58.8,82.1,86.9,90.2,84.6
1,中央区,61.6,57.7,59.5,68.2,74.3,71.7
2,港区,39.9,42.9,61.1,69.5,67.5,60.8
3,新宿区,70.0,71.0,69.0,72.0,76.0,78.2
4,台東区,55.4,60.6,62.2,65.4,69.4,70.4
5,文京区,65.5,80.0,76.0,78.9,79.6,83.0
6,江東区,68.7,80.0,83.5,88.9,94.4,87.6
7,墨田区,69.8,73.2,76.2,76.5,82.0,81.9
8,品川区,79.8,77.7,70.5,73.9,71.2,80.9
9,大田区,70.5,73.9,75.2,78.8,77.3,79.3


In [140]:
data.to_csv("data.csv",index=None)