In [1]:
# check_data.py
from scipy.io import loadmat
import os

def check_yelp_data():
    """检查YelpChi数据文件"""
    if os.path.exists('/root/wsdm_GDN/data/YelpChi.mat'):
        print("Found YelpChi.mat, checking contents...")
        try:
            data = loadmat('data/YelpChi.mat')
            print("Keys in YelpChi.mat:")
            for key in data.keys():
                if not key.startswith('__'):
                    print(f"  {key}: {type(data[key])}, shape: {data[key].shape if hasattr(data[key], 'shape') else 'N/A'}")
        except Exception as e:
            print(f"Error loading YelpChi.mat: {e}")
    else:
        print("YelpChi.mat not found!")

def check_amazon_data():
    """检查Amazon数据文件"""
    if os.path.exists('/root/wsdm_GDN/data/Amazon.mat'):
        print("\nFound Amazon.mat, checking contents...")
        try:
            data = loadmat('data/Amazon.mat')
            print("Keys in Amazon.mat:")
            for key in data.keys():
                if not key.startswith('__'):
                    print(f"  {key}: {type(data[key])}, shape: {data[key].shape if hasattr(data[key], 'shape') else 'N/A'}")
        except Exception as e:
            print(f"Error loading Amazon.mat: {e}")
    else:
        print("Amazon.mat not found!")

if __name__ == "__main__":
    print("Checking data files...")
    check_yelp_data()
    check_amazon_data()
    
    print("\nChecking processed files...")
    processed_files = [
        'data/yelp_rur_adjlists.pickle',
        'data/yelp_rtr_adjlists.pickle', 
        'data/yelp_rsr_adjlists.pickle',
        'data/yelp_homo_adjlists.pickle'
    ]
    
    for file in processed_files:
        if os.path.exists(file):
            print(f"✓ {file} exists")
        else:
            print(f"✗ {file} missing")

Checking data files...
Found YelpChi.mat, checking contents...
Keys in YelpChi.mat:
  Network: <class 'scipy.sparse._csc.csc_matrix'>, shape: (23831, 23831)
  Label: <class 'numpy.ndarray'>, shape: (1, 23831)
  Attributes: <class 'scipy.sparse._csc.csc_matrix'>, shape: (23831, 32)

Found Amazon.mat, checking contents...
Keys in Amazon.mat:
  Network: <class 'scipy.sparse._csc.csc_matrix'>, shape: (10224, 10224)
  Label: <class 'numpy.ndarray'>, shape: (1, 10224)
  Attributes: <class 'scipy.sparse._csc.csc_matrix'>, shape: (10224, 25)

Checking processed files...
✗ data/yelp_rur_adjlists.pickle missing
✗ data/yelp_rtr_adjlists.pickle missing
✗ data/yelp_rsr_adjlists.pickle missing
✗ data/yelp_homo_adjlists.pickle missing
