In [2]:

import pandas as pd
import numpy as np
from math import sqrt
from scipy.stats import norm

PATH = "mymoviedb.csv"  
df = pd.read_csv(PATH, engine="python", on_bad_lines="skip")
pop = pd.to_numeric(df["Popularity"], errors="coerce").dropna()

mu0   = 1000.0    
alpha = 0.05     

n    = int(pop.shape[0])          
xbar = float(pop.mean())           
s    = float(pop.std(ddof=1))     

SE   = s / sqrt(n)                 
z_qs = (xbar - mu0) / SE           
p_two = 2 * (1 - norm.cdf(abs(z_qs)))

z_alpha2 = norm.ppf(1 - alpha/2)   
ci_low   = xbar - z_alpha2 * SE
ci_high  = xbar + z_alpha2 * SE

print(f"H0: mu = {mu0}")
print("H1: mu != mu0  (hai phía)")
print(f"n  = {n}")
print(f"x̄ (mean) = {xbar:,.6f}")
print(f"s (std, ddof=1)= {s:,.6f}")
print(f"SE = s/sqrt(n) = {SE:,.6f}")
print(f"z_obs (z_qs)   = {z_qs:,.6f}")
print(f"p-value (2-tail)= {p_two:.6g}")
if p_two < alpha:
    print("KẾT LUẬN (p-value): p < α  → Bác bỏ H0.")
else:
    print("KẾT LUẬN (p-value): p ≥ α → Chưa đủ cơ sở bác bỏ H0.")
print(f"CI 95% cho μ (z): ({ci_low:,.6f} ; {ci_high:,.6f})")



H0: mu = 1000.0
H1: mu != mu0  (hai phía)
n  = 9827
x̄ (mean) = 40.320570
s (std, ddof=1)= 108.874308
SE = s/sqrt(n) = 1.098285
z_obs (z_qs)   = -873.798425
p-value (2-tail)= 0
KẾT LUẬN (p-value): p < α  → Bác bỏ H0.
CI 95% cho μ (z): (38.167972 ; 42.473168)
