# **Skills for Data Analysts**

In [4]:
import pandas as pd
from scipy.stats import pearsonr
import numpy as np

# 直接创建DataFrame，避免文件读取问题
data = {
    'Student_ID': range(1, 37),
    'GPA': [3.8, 2.5, 3.9, 3.2, 2.8, 3.5, 2.9, 3.7, 2.4, 3.6, 3.1, 2.7, 3.8, 3.0, 3.4, 2.6, 3.9, 3.3, 2.8, 3.5, 2.9, 3.7, 2.5, 3.6, 3.2, 2.7, 3.8, 3.1, 3.4, 2.8, 3.9, 3.3, 2.9, 3.5, 3.0, 3.7],
    'Time_Management': [4.3, 2.3, 4.0, 3.3, 2.7, 3.7, 3.0, 4.0, 2.0, 3.7, 3.3, 2.3, 4.0, 3.0, 3.7, 2.7, 4.3, 3.3, 2.7, 3.7, 3.0, 4.0, 2.3, 3.7, 3.3, 2.7, 4.0, 3.0, 3.7, 2.7, 4.3, 3.3, 2.7, 3.7, 3.0, 4.0],
    'Self_Regulated_Learning': [4.5, 2.0, 4.5, 3.5, 2.5, 4.0, 2.5, 4.0, 2.0, 4.0, 3.0, 2.5, 4.5, 3.0, 3.5, 2.0, 4.5, 3.5, 2.5, 4.0, 2.5, 4.0, 2.0, 4.0, 3.5, 2.5, 4.5, 3.0, 3.5, 2.5, 4.5, 3.5, 2.5, 4.0, 3.0, 4.0],
    'Digital_Tool_Usage': [4.0, 3.5, 3.5, 4.0, 3.0, 3.0, 4.5, 4.0, 3.0, 3.5, 4.0, 3.5, 3.5, 4.0, 3.5, 3.0, 4.0, 3.5, 4.0, 3.0, 4.5, 3.5, 3.0, 3.5, 4.0, 3.5, 3.5, 4.0, 3.5, 3.0, 4.0, 3.5, 4.0, 3.0, 4.5, 3.5],
    'Group_Study': [3.7, 2.3, 3.3, 3.7, 3.0, 3.3, 2.7, 4.0, 2.0, 3.0, 3.3, 2.7, 3.7, 3.0, 3.3, 2.3, 4.0, 3.7, 2.7, 3.3, 3.0, 4.0, 2.0, 3.3, 3.7, 2.7, 3.7, 3.0, 3.3, 2.3, 4.0, 3.7, 2.7, 3.3, 3.0, 4.0],
    'Review_Strategies': [4.5, 2.0, 4.5, 3.5, 2.5, 4.0, 2.5, 4.0, 2.0, 4.0, 3.0, 2.5, 4.5, 3.0, 3.5, 2.0, 4.5, 3.5, 2.5, 4.0, 2.5, 4.0, 2.0, 4.0, 3.5, 2.5, 4.5, 3.0, 3.5, 2.5, 4.5, 3.5, 2.5, 4.0, 3.0, 4.0]
}

df = pd.DataFrame(data)

print("数据预览:")
print(df.head())
print("\n" + "="*50 + "\n")

# 计算相关性
print("各维度与GPA的相关性分析:")
print("-" * 40)

for column in df.columns[2:]:
    corr, p_value = pearsonr(df[column], df['GPA'])
    significance = "***" if p_value < 0.001 else "**" if p_value < 0.01 else "*" if p_value < 0.05 else ""
    print(f"{column:25s}: r = {corr:.3f}, p = {p_value:.4f} {significance}")

print("\n" + "="*50)
print("显著性说明: *** p < 0.001, ** p < 0.01, * p < 0.05")

数据预览:
   Student_ID  GPA  Time_Management  Self_Regulated_Learning  \
0           1  3.8              4.3                      4.5   
1           2  2.5              2.3                      2.0   
2           3  3.9              4.0                      4.5   
3           4  3.2              3.3                      3.5   
4           5  2.8              2.7                      2.5   

   Digital_Tool_Usage  Group_Study  Review_Strategies  
0                 4.0          3.7                4.5  
1                 3.5          2.3                2.0  
2                 3.5          3.3                4.5  
3                 4.0          3.7                3.5  
4                 3.0          3.0                2.5  


各维度与GPA的相关性分析:
----------------------------------------
Time_Management          : r = 0.981, p = 0.0000 ***
Self_Regulated_Learning  : r = 0.986, p = 0.0000 ***
Digital_Tool_Usage       : r = 0.104, p = 0.5473 
Group_Study              : r = 0.868, p = 0.0000 ***
Review