In [5]:
import pandas as pd
import os

def inspect_data():
    """Inspect the data.csv file to understand its structure"""
    data_path = "C:/Users/sis/Desktop/mini1 - Copy/alternatedata/raw_data/data.csv"
    
    if not os.path.exists(data_path):
        print(f"Error: File not found at {data_path}")
        return
    
    # Load the data
    print("="*80)
    print("DATA INSPECTION")
    print("="*80)
    
    df = pd.read_csv(data_path)
    
    print(f"Dataset shape: {df.shape}")
    print(f"Total rows: {len(df)}")
    print(f"Total columns: {len(df.columns)}")
    
    print(f"\n" + "="*80)
    print("ALL COLUMN NAMES")
    print("="*80)
    
    for i, col in enumerate(df.columns):
        print(f"{i+1:3d}. {col}")
    
    print(f"\n" + "="*80)
    print("COLUMN TYPES")
    print("="*80)
    
    print(df.dtypes.value_counts())
    
    print(f"\n" + "="*80)
    print("FIRST 10 COLUMNS DETAILED")
    print("="*80)
    
    for i, col in enumerate(df.columns[:10]):
        print(f"\n{i+1}. {col}")
        print(f"   Type: {df[col].dtype}")
        print(f"   Unique values: {df[col].nunique()}")
        print(f"   Sample values: {list(df[col].unique()[:5])}")
    
    print(f"\n" + "="*80)
    print("LAST 10 COLUMNS DETAILED")
    print("="*80)
    
    for i, col in enumerate(df.columns[-10:]):
        print(f"\n{len(df.columns)-9+i}. {col}")
        print(f"   Type: {df[col].dtype}")
        print(f"   Unique values: {df[col].nunique()}")
        if df[col].dtype in ['int64', 'float64']:
            print(f"   Min: {df[col].min()}")
            print(f"   Max: {df[col].max()}")
            print(f"   Mean: {df[col].mean():.2f}")
    
    print(f"\n" + "="*80)
    print("BENCHMARK ANALYSIS")
    print("="*80)
    
    if 'APP_NAME' in df.columns:
        benchmarks = df['APP_NAME'].value_counts()
        print(f"Total benchmarks: {len(benchmarks)}")
        print(f"Benchmarks:")
        for bench, count in benchmarks.items():
            print(f"  {bench}: {count} samples")
    
    print(f"\n" + "="*80)
    print("SUMMARY")
    print("="*80)
    print(f"✅ Total columns: {len(df.columns)}")
    print(f"✅ Total samples: {len(df)}")
    print(f"✅ Column types: {dict(df.dtypes.value_counts())}")
    print(f"✅ Ready for analysis!")

if __name__ == "__main__":
    inspect_data() 

DATA INSPECTION
Dataset shape: (3072, 292)
Total rows: 3072
Total columns: 292

ALL COLUMN NAMES
  1. APP_NAME
  2. funsafe_math_optimizations
  3. fno_guess_branch_probability
  4. fno_ivopts
  5. fno_tree_loop_optimize
  6. fno_inline_functions
  7. funroll_all_loops
  8. o2
  9. code_size
 10. noBasicBlock
 11. nobasicBlockSingleSocc
 12. nobasicBlock2Socc
 13. noBasicBlockSoccMore
 14. noBasicBlockPred
 15. noBasicBlock2Pred
 16. noBasicBlockPredMore
 17. ft8
 18. ft9
 19. ft10
 20. ft11
 21. ft12
 22. ft13
 23. ft14
 24. ft15
 25. ft16
 26. ft17
 27. ft19
 28. ft20
 29. ft21
 30. ft22
 31. ft23
 32. ft24
 33. ft25
 34. ft26
 35. ft27
 36. ft28
 37. ft29
 38. ft30
 39. ft31
 40. ft33
 41. ft34
 42. ft35
 43. ft36
 44. ft37
 45. ft38
 46. ft39
 47. ft40
 48. ft41
 49. ft42
 50. ft43
 51. ft44
 52. ft45
 53. ft46
 54. ft47
 55. ft48
 56. ft49
 57. ft50
 58. ft51
 59. ft52
 60. ft53
 61. ft54
 62. ft55
 63. ILP32_1
 64. arithmetic_1
 65. InstrFootprint64_1
 66. DataFootprint64_1
 67. 