In [78]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, cross_val_score
import numpy as np

file_path = r'C:\Users\user\Desktop\우울증예측1.xlsx'
data = pd.read_excel(file_path)

train_data = data[(data['연도'] >= 2000) & (data['연도'] <= 2024)].copy()

train_data['독거노인 우울증 수'] = train_data['독거노인인구'] * (train_data['우울증환자수비율'] / 100)

X = train_data[['독거노인인구', '우울증환자수비율']]  
y = train_data['독거노인 우울증 수'] 

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)

X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# 선형 회귀 모델 사용
model = LinearRegression()
model.fit(X_train, y_train)

train_score = model.score(X_train, y_train) 
val_score = model.score(X_val, y_val)   


print(f"교차 검증 세트 결정계수 (R^2): {val_score:.4f}")

years_to_predict = np.arange(2025, 2071)

future_data = pd.DataFrame({
    '연도': years_to_predict,
    '독거노인인구': np.nan,  
    '우울증환자수비율': np.nan,  
    '독거노인 우울증 수': np.nan  
})

last_known_year = 2024
last_known_population = data.loc[data['연도'] == last_known_year, '독거노인인구'].values[0]
last_known_depression_rate = data.loc[data['연도'] == last_known_year, '우울증환자수비율'].values[0]
last_known_depression_number = train_data.loc[train_data['연도'] == last_known_year, '독거노인 우울증 수'].values[0]

population_growth_rate = 0.007 
depression_rate_growth = 0.1

future_data.loc[future_data['연도'] == 2025, '독거노인 우울증 수'] = last_known_depression_number * 1.10  

for year in years_to_predict:
    if year > 2025: 
        future_data.loc[future_data['연도'] == year, '독거노인인구'] = last_known_population * (1 + population_growth_rate) ** (year - last_known_year)
        future_data.loc[future_data['연도'] == year, '우울증환자수비율'] = last_known_depression_rate + depression_rate_growth * (year - last_known_year)
        

        future_data.loc[future_data['연도'] == year, '독거노인 우울증 수'] = future_data.loc[future_data['연도'] == year, '독거노인인구'] * (future_data.loc[future_data['연도'] == year, '우울증환자수비율'] / 100)


    future_data.loc[future_data['연도'] == year, '독거노인 우울증 수'] = round(future_data.loc[future_data['연도'] == year, '독거노인 우울증 수'].values[0])

print(future_data[['연도', '독거노인 우울증 수']])

future_data_result = future_data[['연도', '독거노인 우울증 수']]


future_data_result.to_excel(r'C:\Users\user\Desktop\독거노인 우울증 수 예측.xlsx', index=False)

cv_scores = cross_val_score(model, X_train, y_train, cv=2, scoring='r2') 

print(f"교차검증 결정계수 (R^2) 평균: {cv_scores.mean():.4f}")
print(f"교차검증 결정계수 (R^2) 표준편차: {cv_scores.std():.4f}")


test_score = model.score(X_test, y_test)

print(f"훈련 세트 결정계수 (R^2): {train_score:.4f}")
print(f"테스트 세트 결정계수 (R^2): {test_score:.4f}")


교차 검증 세트 결정계수 (R^2): 0.9619
      연도  독거노인 우울증 수
0   2025     55577.0
1   2026     55690.0
2   2027     58323.0
3   2028     60990.0
4   2029     63692.0
5   2030     66428.0
6   2031     69200.0
7   2032     72007.0
8   2033     74850.0
9   2034     77730.0
10  2035     80646.0
11  2036     83599.0
12  2037     86589.0
13  2038     89617.0
14  2039     92684.0
15  2040     95789.0
16  2041     98933.0
17  2042    102116.0
18  2043    105339.0
19  2044    108602.0
20  2045    111905.0
21  2046    115250.0
22  2047    118635.0
23  2048    122063.0
24  2049    125533.0
25  2050    129045.0
26  2051    132600.0
27  2052    136199.0
28  2053    139842.0
29  2054    143529.0
30  2055    147260.0
31  2056    151037.0
32  2057    154860.0
33  2058    158729.0
34  2059    162644.0
35  2060    166606.0
36  2061    170616.0
37  2062    174674.0
38  2063    178780.0
39  2064    182935.0
40  2065    187140.0
41  2066    191394.0
42  2067    195699.0
43  2068    200055.0
44  2069    204462.0
45  20