In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("final.csv")  # Load your dataset

In [3]:
# Rename columns to standard format for consistency
df.rename(columns={
    "Order Quantity": "Order_Quantity",
    "LME price": "LME_price",
    "Customer Category": "Customer_Category",
    "Lead Time (weeks)": "Lead_Time_weeks",
    "Quote Price (SEK)": "Quote_Price_SEK",
    "Quote Date": "Quote_Date",
    "Profile Name": "Profile_Name",
    "GD&T": "GD_T"
}, inplace=True)

# Simulate the feature-engineered variables for noise injection
df["Profile_Complexity"] = np.random.uniform(0.5, 2.0, size=len(df))
df["Manufacturing_Difficulty"] = np.random.randint(1, 4, size=len(df))  # 1: low, 2: med, 3: high

# 1. Controlled Random Noise
np.random.seed(42)
base_noise = np.random.normal(loc=0, scale=0.05, size=len(df))
df["Quote_Price_Noisy"] = df["Quote_Price_SEK"] * (1 + base_noise)

# 2. Conditional Noise for Large Orders
order_noise = np.where(df["Order_Quantity"] > df["Order_Quantity"].median(),
                       np.random.normal(loc=-0.02, scale=0.03, size=len(df)),
                       0)
df["Quote_Price_Noisy"] += df["Quote_Price_SEK"] * order_noise

# 3. Profile Complexity Adjustment
profile_factor = df["Profile_Complexity"] / df["Profile_Complexity"].max()
profile_noise = np.random.normal(loc=0, scale=0.02, size=len(df)) * profile_factor
df["Quote_Price_Noisy"] += df["Quote_Price_SEK"] * profile_noise

# 4. GD&T + Tolerances Variability
tolerance_noise = (df["Manufacturing_Difficulty"] / 3.0) * np.random.normal(loc=0.01, scale=0.015, size=len(df))
df["Quote_Price_Noisy"] += df["Quote_Price_SEK"] * tolerance_noise

# 5. Final adjustments
df["Quote_Price_Noisy"] = df["Quote_Price_Noisy"].clip(lower=0.01)
df["Quote_Price_Noisy"] = df["Quote_Price_Noisy"].round(2)
df["Quote_Price_SEK"] = df["Quote_Price_Noisy"]
df.drop(columns=["Quote_Price_Noisy"], inplace=True)




In [4]:
df.head(5)  # Display the first few rows of the processed DataFrame

Unnamed: 0,Alloy,Finish,Length,Weight,Tolerances,GD_T,Order_Quantity,LME_price,Customer_Category,Lead_Time_weeks,Quote_Price_SEK,Quote_Date,Profile_Name,Profile_Complexity,Manufacturing_Difficulty,LME_MA_7,LME_Lag_1
0,0.0,4.0,17.6,1.718,0.147,0.0,50862,2.75,2.0,11.0,3.86,2025-01-01,2.993724,1.595382,1,2.75,2.75
1,1.0,3.0,27.0,1.245,0.067,2.0,116471,2.96,1.0,9.0,2.66,2025-01-01,3.096523,1.317834,1,2.855,2.75
2,2.0,4.0,24.1,0.885,0.099,2.0,63929,3.97,2.0,7.0,3.69,2025-01-02,3.041944,1.285909,2,3.226667,2.96
3,2.0,4.0,26.1,1.042,0.152,1.0,120204,2.83,3.0,10.0,2.73,2025-01-02,3.146656,1.703251,1,3.1275,3.97
4,2.0,3.0,34.8,1.368,0.118,0.0,94419,3.59,1.0,2.0,2.98,2025-01-03,3.014675,1.382194,2,3.22,2.83


In [5]:
output_file = "final1.csv"
df.to_csv(output_file, index=False)