In [1]:
import numpy as np
import os

def analyze_npz_file(file_path):
    """
    Ph√¢n t√≠ch file .npz (compressed numpy arrays)
    """
    print(f"\n{'='*60}")
    print(f"PH√ÇN T√çCH FILE: {file_path}")
    print(f"{'='*60}")
    
    if not os.path.exists(file_path):
        print(f"‚ùå File kh√¥ng t·ªìn t·∫°i: {file_path}")
        return
    
    # Ki·ªÉm tra k√≠ch th∆∞·ªõc file
    file_size = os.path.getsize(file_path)
    print(f"üìÅ K√≠ch th∆∞·ªõc file: {file_size:,} bytes ({file_size/1024/1024:.2f} MB)")
    
    try:
        # ƒê·ªçc file .npz
        with np.load(file_path) as data:
            print(f"üîç S·ªë l∆∞·ª£ng arrays trong file: {len(data.files)}")
            print(f"üìù Danh s√°ch keys: {list(data.files)}")
            
            total_memory = 0
            
            for key in data.files:
                array = data[key]
                array_size = array.nbytes
                total_memory += array_size
                
                print(f"\nüî∏ Key: '{key}'")
                print(f"   - Shape: {array.shape}")
                print(f"   - Dtype: {array.dtype}")
                print(f"   - Size: {array.size:,} elements")
                print(f"   - Memory: {array_size:,} bytes ({array_size/1024/1024:.2f} MB)")
                
                # Hi·ªÉn th·ªã m·ªôt s·ªë th·ªëng k√™ c∆° b·∫£n
                if array.size > 0:
                    print(f"   - Min: {array.min():.6f}")
                    print(f"   - Max: {array.max():.6f}")
                    print(f"   - Mean: {array.mean():.6f}")
                    print(f"   - Std: {array.std():.6f}")
                
                # Hi·ªÉn th·ªã m·∫´u d·ªØ li·ªáu (n·∫øu kh√¥ng qu√° l·ªõn)
                if array.size <= 100:
                    print(f"   - Sample data:\n{array}")
                elif len(array.shape) >= 2:
                    print(f"   - Sample (first 3x3):\n{array[:3, :3] if array.shape[0] >= 3 and array.shape[1] >= 3 else array[:2, :2]}")
                else:
                    print(f"   - Sample (first 10): {array.flat[:10]}")
            
            print(f"\nüíæ T·ªïng b·ªô nh·ªõ c√°c arrays: {total_memory:,} bytes ({total_memory/1024/1024:.2f} MB)")
            
    except Exception as e:
        print(f"‚ùå L·ªói khi ƒë·ªçc file: {e}")

def analyze_npy_file(file_path):
    """
    Ph√¢n t√≠ch file .npy (single numpy array)
    """
    print(f"\n{'='*60}")
    print(f"PH√ÇN T√çCH FILE: {file_path}")
    print(f"{'='*60}")
    
    if not os.path.exists(file_path):
        print(f"‚ùå File kh√¥ng t·ªìn t·∫°i: {file_path}")
        return
    
    # Ki·ªÉm tra k√≠ch th∆∞·ªõc file
    file_size = os.path.getsize(file_path)
    print(f"üìÅ K√≠ch th∆∞·ªõc file: {file_size:,} bytes ({file_size/1024/1024:.2f} MB)")
    
    try:
        # ƒê·ªçc file .npy
        array = np.load(file_path)
        
        print(f"üî∏ Array info:")
        print(f"   - Shape: {array.shape}")
        print(f"   - Dtype: {array.dtype}")
        print(f"   - Size: {array.size:,} elements")
        print(f"   - Memory: {array.nbytes:,} bytes ({array.nbytes/1024/1024:.2f} MB)")
        
        # Th·ªëng k√™ c∆° b·∫£n
        if array.size > 0:
            print(f"   - Min: {array.min():.6f}")
            print(f"   - Max: {array.max():.6f}")
            print(f"   - Mean: {array.mean():.6f}")
            print(f"   - Std: {array.std():.6f}")
        
        # Hi·ªÉn th·ªã m·∫´u d·ªØ li·ªáu
        if array.size <= 100:
            print(f"   - Full data:\n{array}")
        elif len(array.shape) >= 2:
            print(f"   - Sample (first 5x5):")
            print(array[:5, :5] if array.shape[0] >= 5 and array.shape[1] >= 5 else array)
        else:
            print(f"   - Sample (first 20): {array.flat[:20]}")
            
        # Ph√¢n t√≠ch ƒë·∫∑c bi·ªát cho adjacency matrix
        if len(array.shape) == 2 and array.shape[0] == array.shape[1]:
            print(f"\nüîç PH√ÇN T√çCH ADJACENCY MATRIX:")
            print(f"   - L√† ma tr·∫≠n vu√¥ng: {array.shape[0]}x{array.shape[1]}")
            print(f"   - S·ªë edges (non-zero): {np.count_nonzero(array):,}")
            print(f"   - Density: {np.count_nonzero(array) / array.size * 100:.2f}%")
            print(f"   - Symmetric: {np.allclose(array, array.T)}")
            print(f"   - Diagonal sum: {np.trace(array):.6f}")
            
    except Exception as e:
        print(f"‚ùå L·ªói khi ƒë·ªçc file: {e}")

def compare_files():
    """
    So s√°nh 2 file v√† t√¨m m·ªëi li√™n h·ªá
    """
    print(f"\n{'='*60}")
    print(f"SO S√ÅNH V√Ä PH√ÇN T√çCH M·ªêI LI√äN H·ªÜ")
    print(f"{'='*60}")
    
    npz_path = "data/bike.npz"
    npy_path = "data/bike_svd.npy"
    
    if os.path.exists(npz_path) and os.path.exists(npy_path):
        try:
            # ƒê·ªçc c·∫£ 2 file
            npz_data = np.load(npz_path)
            npy_data = np.load(npy_path)
            
            print(f"üîç T√¨m m·ªëi li√™n h·ªá gi·ªØa 2 file:")
            
            # So s√°nh k√≠ch th∆∞·ªõc
            for key in npz_data.files:
                array = npz_data[key]
                if len(array.shape) >= 2:
                    print(f"   - {key} shape: {array.shape}")
                    if array.shape[0] == npy_data.shape[0] or array.shape[1] == npy_data.shape[0]:
                        print(f"     ‚úÖ C√≥ li√™n h·ªá v·ªÅ s·ªë nodes v·ªõi adjacency matrix")
            
            print(f"   - bike_svd.npy shape: {npy_data.shape}")
            print(f"   - Adjacency matrix cho {npy_data.shape[0]} nodes")
            
        except Exception as e:
            print(f"‚ùå L·ªói khi so s√°nh: {e}")
    else:
        print("‚ùå M·ªôt ho·∫∑c c·∫£ 2 file kh√¥ng t·ªìn t·∫°i")

# Ch·∫°y ph√¢n t√≠ch
print("üöÄ B·∫ÆT ƒê·∫¶U PH√ÇN T√çCH D·ªÆ LI·ªÜU BIKE")

# Ph√¢n t√≠ch file bike.npz
analyze_npz_file("data/bike.npz")

# Ph√¢n t√≠ch file bike_svd.npy  
analyze_npy_file("data/bike_svd.npy")

# So s√°nh 2 file
compare_files()

print(f"\n{'='*60}")
print("‚úÖ HO√ÄN TH√ÄNH PH√ÇN T√çCH")
print(f"{'='*60}")

üöÄ B·∫ÆT ƒê·∫¶U PH√ÇN T√çCH D·ªÆ LI·ªÜU BIKE

PH√ÇN T√çCH FILE: data/bike.npz
üìÅ K√≠ch th∆∞·ªõc file: 35,174,604 bytes (33.55 MB)
üîç S·ªë l∆∞·ª£ng arrays trong file: 11
üìù Danh s√°ch keys: ['train_x', 'train_target', 'train_timestamp', 'val_x', 'val_target', 'val_timestamp', 'test_x', 'test_target', 'test_timestamp', 'mean', 'std']

üî∏ Key: 'train_x'
   - Shape: (3001, 250, 2, 12)
   - Dtype: float64
   - Size: 18,006,000 elements
   - Memory: 144,048,000 bytes (137.37 MB)
   - Min: -1.000000
   - Max: 1.000000
   - Mean: -0.937762

üî∏ Key: 'train_x'
   - Shape: (3001, 250, 2, 12)
   - Dtype: float64
   - Size: 18,006,000 elements
   - Memory: 144,048,000 bytes (137.37 MB)
   - Min: -1.000000
   - Max: 1.000000
   - Mean: -0.937762
   - Std: 0.092912
   - Sample (first 3x3):
[[[[-0.97916667 -1.         -1.         -1.         -1.
    -1.         -1.         -1.         -1.         -1.
    -1.         -1.        ]
   [-1.         -1.         -1.         -1.         -1.
    -