In [1]:
import pandas as pd 

In [3]:
# Load the file
path = r"E:\VIIT\TY\UST Analog Workflow Automation\Project\UST_Analog_automation\data\raw\Opam\Gain_dataset.csv.xlsx"
dff = pd.read_excel(path)

In [4]:
dff.shape

(10, 2000)

In [7]:


# Step 1: remove odd indexed columns (1-based) -> keep even indices
# Convert to 0-based: keep indices where (index+1)%2==0
even_cols = [col for i, col in enumerate(dff.columns) if (i + 1) % 2 == 0]
df_even = dff[even_cols]

# Step 2: Extract W, X, Y and Gain from column names and reshape
rows = []
for col in df_even.columns:
    # Example column: Gain (w=1.0, x=2.0, y=3.0) Y
    name = col.lower()
    w = float(name.split("w=")[1].split(",")[0])
    x = float(name.split("x=")[1].split(",")[0])
    y = float(name.split("y=")[1].split(")")[0])
    gains = df_even[col].dropna().values
    
    for g in gains:
        rows.append({"W": w, "X": x, "Y": y, "Gain": g})

ml_df = pd.DataFrame(rows)

In [8]:
ml_df.head(), ml_df.shape


(          W         X         Y       Gain
 0  0.000038  0.000002  0.000011  56.053547
 1  0.000038  0.000002  0.000011  56.053547
 2  0.000038  0.000002  0.000011  56.053547
 3  0.000038  0.000002  0.000011  56.053547
 4  0.000038  0.000002  0.000011  56.053547,
 (10000, 4))

In [9]:
ml_df.head(10)

Unnamed: 0,W,X,Y,Gain
0,3.8e-05,2e-06,1.1e-05,56.053547
1,3.8e-05,2e-06,1.1e-05,56.053547
2,3.8e-05,2e-06,1.1e-05,56.053547
3,3.8e-05,2e-06,1.1e-05,56.053547
4,3.8e-05,2e-06,1.1e-05,56.053547
5,3.8e-05,2e-06,1.1e-05,56.053547
6,3.8e-05,2e-06,1.1e-05,56.053547
7,3.8e-05,2e-06,1.1e-05,56.053547
8,3.8e-05,2e-06,1.1e-05,56.053547
9,3.8e-05,2e-06,1.1e-05,56.053547


In [10]:
import pandas as pd
import re

# ----------------------------
# 1. Load raw dataset
# ----------------------------
input_path = r"E:\VIIT\TY\UST Analog Workflow Automation\Project\UST_Analog_automation\data\raw\Opam\Gain_dataset.csv.xlsx"
output_path = "gain_ml_ready.csv"

df = pd.read_excel(input_path)

# ----------------------------
# 2. Keep only even-indexed columns (1-based)
#    => remove odd index columns
# ----------------------------
even_cols = [col for i, col in enumerate(df.columns) if (i + 1) % 2 == 0]
df = df[even_cols]

# ----------------------------
# 3. Build ML-ready rows
#    One column = one sample
# ----------------------------
rows = []

pattern = r"w=([\deE\.-]+),\s*x=([\deE\.-]+),\s*y=([\deE\.-]+)"

for col in df.columns:
    # Extract W, X, Y from column name
    match = re.search(pattern, col)
    if not match:
        continue

    w = float(match.group(1))
    x = float(match.group(2))
    y = float(match.group(3))

    # Take ONE representative Gain value (all are identical)
    gain = df[col].dropna().iloc[0]

    rows.append({
        "W": w,
        "X": x,
        "Y": y,
        "Gain": gain
    })

# ----------------------------
# 4. Create final DataFrame
# ----------------------------
ml_df = pd.DataFrame(rows)

# ----------------------------
# 5. Save ML-ready CSV
# ----------------------------
ml_df.to_csv(output_path, index=False)

print("✅ ML-ready dataset saved as:", output_path)
print("Shape:", ml_df.shape)


✅ ML-ready dataset saved as: gain_ml_ready.csv
Shape: (1000, 4)


In [11]:
ml_df.tail(10)

Unnamed: 0,W,X,Y,Gain
990,8.4e-05,5e-06,1.1e-05,34.674895
991,8.4e-05,5e-06,1e-05,34.688571
992,8.4e-05,5e-06,5e-06,36.368562
993,8.4e-05,5e-06,6e-06,36.343296
994,8.4e-05,5e-06,6e-06,36.314553
995,8.4e-05,5e-06,7e-06,36.284541
996,8.4e-05,5e-06,8e-06,36.254448
997,8.4e-05,5e-06,8e-06,36.224919
998,8.4e-05,5e-06,9e-06,36.1963
999,8.4e-05,5e-06,1e-05,36.168762


In [15]:
import pandas as pd
import re

# ----------------------------
# 1. Load raw dataset
# ----------------------------
input_path = r"E:\VIIT\TY\UST Analog Workflow Automation\Project\UST_Analog_automation\data\raw\Opam\UGF_dataset.csv"
output_path = "ugf_ml_ready.csv"

df = pd.read_csv(input_path)

# ----------------------------
# 2. Keep only even-indexed columns (1-based)
#    => remove odd index columns
# ----------------------------
even_cols = [col for i, col in enumerate(df.columns) if (i + 1) % 2 == 0]
df = df[even_cols]

# ----------------------------
# 3. Build ML-ready rows
#    One column = one sample
# ----------------------------
rows = []

pattern = r"w=([\deE\.-]+),\s*x=([\deE\.-]+),\s*y=([\deE\.-]+)"

for col in df.columns:
    # Extract W, X, Y from column name
    match = re.search(pattern, col)
    if not match:
        continue

    w = float(match.group(1))
    x = float(match.group(2))
    y = float(match.group(3))

    # Take ONE representative Gain value (all are identical)
    gain = df[col].dropna().iloc[0]

    rows.append({
        "W": w,
        "X": x,
        "Y": y,
        "Gain": gain
    })

# ----------------------------
# 4. Create final DataFrame
# ----------------------------
ml_df = pd.DataFrame(rows)

# ----------------------------
# 5. Save ML-ready CSV
# ----------------------------
ml_df.to_csv(output_path, index=False)

print("✅ ML-ready dataset saved as:", output_path)
print("Shape:", ml_df.shape)


✅ ML-ready dataset saved as: ugf_ml_ready.csv
Shape: (1000, 4)
