In [None]:
import pandas as pd
import plotly.express as px
import statsmodels.api as sm
import webbrowser

In [None]:
file_path ="C:\\Users\\38670\\Documents\\Documents\\2020-2023 青年失业率及季度 GDP_转置.xlsx"
data = pd.read_excel(file_path, sheet_name='Sheet1')

In [None]:
data.columns = ['时间点', 'GDP', '青年失业率', '数据来源']
data = data.drop(columns=['数据来源']).drop(0)
data['GDP'] = pd.to_numeric(data['GDP'], errors='coerce')
data['青年失业率'] = pd.to_numeric(data['青年失业率'], errors='coerce')
data.dropna(inplace=True)

In [None]:
# 时间升序排列并计算GDP增长率
data = data[::-1].reset_index(drop=True)
data['GDP增长率(%)'] = data['GDP'].pct_change() * 100
data.dropna(inplace=True)

In [None]:
def convert_quarter_label(label):
    year = label[:4]
    quarter_map = {
        '第一季度': 'S1',
        '第二季度': 'S2',
        '第三季度': 'S3',
        '第四季度': 'S4',
    }
    quarter = label[-4:]
    return year + quarter_map.get(quarter, '')

In [None]:
data['时间缩写'] = data['时间点'].apply(convert_quarter_label)
data['年份'] = data['时间点'].str[:4]

In [None]:
color_map = {
    '2020': 'red',
    '2021': 'green',
    '2022': 'blue',
    '2023': 'orange'
}
data['颜色'] = data['年份'].map(color_map)

In [None]:
# 计算整体回归线
X = data['GDP增长率(%)']
y = data['青年失业率']
X_const = sm.add_constant(X)
model = sm.OLS(y, X_const).fit()
slope = model.params['GDP增长率(%)']
intercept = model.params['const']
r2 = model.rsquared
regression_text = f"回归方程：y = {intercept:.2f} + {slope:.2f}x<br>R² = {r2:.3f}"

In [None]:
fig = px.scatter(
    data,
    x='GDP增长率(%)',
    y='青年失业率',
    text='时间缩写',
    hover_data={'时间点': True, 'GDP增长率(%)': ':.2f', '青年失业率': ':.2f'},
    trendline="ols",
    trendline_color_override="black",
    labels={
        'GDP增长率(%)': 'GDP增长率 (%)',
        '青年失业率': '青年失业率 (%)',
    },
    title='2020-2023年青年失业率与GDP增长率关系分析'
)

In [None]:
fig.for_each_trace(
    lambda trace: trace.update(marker=dict(color=data['颜色']) if trace.mode == 'markers' else {})
)

In [None]:
# 添加图注、公式
fig.update_traces(textposition='top center')
fig.update_layout(
    title_font_size=20,
    xaxis_title='GDP增长率 (%)',
    yaxis_title='青年失业率 (%)',
    showlegend=False,
    annotations=[
        dict(
            text="注：季度命名如“2020S1”代表2020年第一季度；数据来源：国家统计局",
            xref="paper", yref="paper",
            x=0, y=-0.27, showarrow=False,
            font=dict(size=12)
        ),
        dict(
            text=regression_text,
            xref="paper", yref="paper",
            x=0.5, y=1.05, showarrow=False,
            font=dict(size=13),
            align="center"
        )
    ],
    margin=dict(b=130)
)

In [None]:
html_file_path = '散点图.html'
fig.write_html(html_file_path)
webbrowser.open_new_tab(html_file_path)