In [1]:
import pandas as pd
import numpy as np

In [2]:
def inspect_csv(csv_path, feature_cols, target_col):
    print("=" * 60)
    print(f"Inspecting file: {csv_path}")
    print("=" * 60)

    # Load CSV
    df = pd.read_csv(csv_path)

    # -------------------------
    # Basic info
    # -------------------------
    print("\nShape (rows, cols):")
    print(df.shape)

    print("\nColumn names:")
    print(df.columns.tolist())

    # -------------------------
    # Data types
    # -------------------------
    print("\nData types:")
    print(df.dtypes)

    # -------------------------
    # Missing values
    # -------------------------
    print("\nMissing values per column:")
    print(df.isnull().sum())

    # -------------------------
    # Duplicate rows
    # -------------------------
    print("\nDuplicate rows count:")
    print(df.duplicated().sum())

    # -------------------------
    # Feature + target existence
    # -------------------------
    missing_features = [c for c in feature_cols if c not in df.columns]
    if missing_features:
        print("\n❌ Missing feature columns:", missing_features)
    else:
        print("\n✅ All feature columns present")

    if target_col not in df.columns:
        print(f"\n❌ Target column '{target_col}' missing")
    else:
        print(f"\n✅ Target column '{target_col}' present")

    # -------------------------
    # Numeric check
    # -------------------------
    non_numeric = df[feature_cols + [target_col]].select_dtypes(
        exclude=[np.number]
    ).columns.tolist()

    if non_numeric:
        print("\n❌ Non-numeric columns found:", non_numeric)
    else:
        print("\n✅ All features & target are numeric")

    # -------------------------
    # Statistics
    # -------------------------
    print("\nBasic statistics:")
    print(df[feature_cols + [target_col]].describe())

    print("\nCSV inspection completed ✅")


In [4]:
inspect_csv(
    csv_path=r"D:\UST Project\UST_Analog_automation\data\processed\Opam\gain_ml_ready.csv",
    feature_cols=["W", "X", "Y"],
    target_col = "Gain"
)


Inspecting file: D:\UST Project\UST_Analog_automation\data\processed\Opam\gain_ml_ready.csv

Shape (rows, cols):
(1000, 4)

Column names:
['W', 'X', 'Y', 'Gain']

Data types:
W       float64
X       float64
Y       float64
Gain    float64
dtype: object

Missing values per column:
W       0
X       0
Y       0
Gain    0
dtype: int64

Duplicate rows count:
0

✅ All feature columns present

✅ Target column 'Gain' present

✅ All features & target are numeric

Basic statistics:
                 W            X            Y         Gain
count  1000.000000  1000.000000  1000.000000  1000.000000
mean      0.000064     0.000004     0.000008    47.104156
std       0.000016     0.000001     0.000002     8.778978
min       0.000038     0.000002     0.000005    29.495715
25%       0.000050     0.000003     0.000006    38.729487
50%       0.000064     0.000004     0.000008    50.783553
75%       0.000078     0.000005     0.000010    55.060020
max       0.000090     0.000006     0.000011    56.315213


In [7]:
inspect_csv(
    csv_path=r"D:\UST Project\UST_Analog_automation\data\processed\Opam\pm_ml_ready.csv",
    feature_cols=["W", "X", "Y"],
    target_col="Phase Margin "
)


Inspecting file: D:\UST Project\UST_Analog_automation\data\processed\Opam\pm_ml_ready.csv

Shape (rows, cols):
(1000, 4)

Column names:
['W', 'X', 'Y', 'Phase Margin ']

Data types:
W                float64
X                float64
Y                float64
Phase Margin     float64
dtype: object

Missing values per column:
W                0
X                0
Y                0
Phase Margin     0
dtype: int64

Duplicate rows count:
0

✅ All feature columns present

✅ Target column 'Phase Margin ' present

✅ All features & target are numeric

Basic statistics:
                 W            X            Y  Phase Margin 
count  1000.000000  1000.000000  1000.000000    1000.000000
mean      0.000064     0.000004     0.000008      73.125908
std       0.000016     0.000001     0.000002      15.320596
min       0.000038     0.000002     0.000005      54.138733
25%       0.000050     0.000003     0.000006      60.711288
50%       0.000064     0.000004     0.000008      67.737617
75%       0.00

In [8]:
inspect_csv(
    csv_path=r"D:\UST Project\UST_Analog_automation\data\processed\Opam\ugf_ml_ready.csv",
    feature_cols=["W", "X", "Y"],
    target_col="UGF"
)


Inspecting file: D:\UST Project\UST_Analog_automation\data\processed\Opam\ugf_ml_ready.csv

Shape (rows, cols):
(1000, 4)

Column names:
['W', 'X', 'Y', 'UGF']

Data types:
W      float64
X      float64
Y      float64
UGF    float64
dtype: object

Missing values per column:
W      0
X      0
Y      0
UGF    0
dtype: int64

Duplicate rows count:
0

✅ All feature columns present

✅ Target column 'UGF' present

✅ All features & target are numeric

Basic statistics:
                 W            X            Y           UGF
count  1000.000000  1000.000000  1000.000000  1.000000e+03
mean      0.000064     0.000004     0.000008  1.305384e+08
std       0.000016     0.000001     0.000002  2.141255e+07
min       0.000038     0.000002     0.000005  5.385289e+07
25%       0.000050     0.000003     0.000006  1.203666e+08
50%       0.000064     0.000004     0.000008  1.361412e+08
75%       0.000078     0.000005     0.000010  1.461441e+08
max       0.000090     0.000006     0.000011  1.654400e+08

C