In [1]:
#cell 1 - load obt 
import os
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv 

# 加载本地.env文件
load_dotenv()

# 从环境变量读取连接信息
USER = os.getenv("DB_USER")
PASSWORD = os.getenv("DB_PASS")
HOST = os.getenv("DB_HOST","localhost")
PORT = os.getenv("DB_PORT","5432")
DB = os.getenv("DB_NAME")

# 检查读取是否成功(调试用)
if not PASSWORD:
    print("警告:未找到数据库密码,请检查.env文件")
else:
    print("配置加载成功(具体信息已隐藏)")

engine = create_engine(f"postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{DB}")
df = pd.read_sql("SELECT * FROM analysis.analysis_orders_obt", engine)

# 基础检查
df.shape, df[['delay_days','review_score','gmv']].describe()


配置加载成功(具体信息已隐藏)


((96470, 26),
          delay_days  review_score           gmv
 count  96470.000000  96470.000000  96470.000000
 mean     -10.957013      4.126392    159.853663
 std       10.019197      1.325737    218.820405
 min     -146.000000      0.000000      0.000000
 25%      -16.000000      4.000000     61.880000
 50%      -11.000000      5.000000    105.280000
 75%       -6.000000      5.000000    176.330000
 max      188.000000      5.000000  13664.080000)

In [2]:
#cell 2 - calculate statistical fields


corr_p = df[["delay_days","review_score"]].corr(method="pearson").iloc[0,1]
corr_s = df[["delay_days","review_score"]].corr(method="spearman").iloc[0,1]
print(f"Pearson corr(delay,review) = {corr_p:.4f}")
print(f"Spearman corr(delay,review) = {corr_s:.4f}")

# 
print(df.groupby("delivery_status")["review_score"].agg(["count","mean","median"]))



Pearson corr(delay,review) = -0.2611
Spearman corr(delay,review) = -0.1780
                 count      mean  median
delivery_status                         
Late_Severe       5164  1.882455     1.0
Late_Small        2662  3.726521     4.0
OnTime           88644  4.269121     5.0
