In [5]:
import pandas as pd
import numpy as np

# Load and clean
df = pd.read_csv(r"e:\sessions go mycode\Supply Chain Dashboard and Analysis.csv")
df.columns = df.columns.str.strip()



In [6]:
# Convert currency columns (if needed)
def parse_currency(x):
    try:
        return float(str(x).replace("?", "").replace(",", ""))
    except:
        return np.nan

currency_cols = ["Inventory Value", "Cost to Supplier", "Shipping Costs", "Manufacturing Costs", "Transportation Costs", "Price"]
for col in currency_cols:
    if col in df.columns:
        df[col] = df[col].apply(parse_currency)

# Feature engineering: Risk score
df["Stockout Risk"] = (df["Sales Velocity (Daily)"] / df["Availability"]).replace([np.inf, -np.inf], np.nan)
df["Holdout Risk"] = df["Sales Velocity (Daily)"] * df["Reorder Point"]
df["Risk Score"] = df["Stockout Risk"].fillna(0) + df["Holdout Risk"].fillna(0)

# Encode high-risk flag
df["High Risk"] = df["Risk Score"] > df["Risk Score"].quantile(0.75)

In [7]:
#Risk classification model 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Features and target
features = ["Sales Velocity (Daily)", "Availability", "Reorder Point", "Risk Score"]
X = df[features].fillna(0)
y = df["High Risk"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Evaluation
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

       False       1.00      1.00      1.00        15
        True       1.00      1.00      1.00         5

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



In [10]:
print(df.columns)


Index(['SKU', 'Sales Velocity (Daily)', 'Reorder Point', 'Availability',
       'Supplier ReOrder', 'Manufacuring Reorder', 'EOQ', 'Unnamed: 7',
       'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12',
       'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16',
       'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19', 'Unnamed: 20',
       'Unnamed: 21', 'Unnamed: 22', 'Unnamed: 23', 'Unnamed: 24',
       'Unnamed: 25', 'Unnamed: 26', 'Unnamed: 27', 'Unnamed: 28',
       'Unnamed: 29', 'Unnamed: 30', 'Stockout Risk', 'Holdout Risk',
       'Risk Score', 'High Risk', 'EOQ Deviation',
       'EOQ Deviates Significantly'],
      dtype='object')


In [11]:
#drop NaNs
df["EOQ Deviation"] = df["EOQ"] - df["Manufacuring Reorder"]
df_clean = df.dropna(subset=["EOQ Deviation", "Supplier ReOrder"])
supplier_summary = df_clean.groupby("Supplier ReOrder")["EOQ Deviation"].agg(["mean", "std"]).reset_index()


In [12]:
df.describe()

Unnamed: 0,Sales Velocity (Daily),Reorder Point,Availability,EOQ,Unnamed: 16,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Stockout Risk,Holdout Risk,Risk Score
count,100.0,100.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,100.0,100.0
mean,5.16,110.633111,48.4,,,,,,,,0.288927,800.946556,801.235483
std,3.422209,89.267053,30.743317,,,,,,,,0.930261,931.198041,931.28717
min,0.0,1.066667,1.0,,,,,,,,0.0,0.0,0.0
25%,2.0,36.133333,22.75,,,,,,,,0.051012,80.35,80.393138
50%,4.0,87.044444,43.5,,,,,,,,0.107097,411.177778,411.470151
75%,8.0,168.033333,75.0,,,,,,,,0.231216,1203.961111,1204.200971
max,11.0,383.833333,100.0,,,,,,,,9.0,4222.166666,4222.280068


In [14]:
print(df.columns.tolist())


['SKU', 'Sales Velocity (Daily)', 'Reorder Point', 'Availability', 'Supplier ReOrder', 'Manufacuring Reorder', 'EOQ', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22', 'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 26', 'Unnamed: 27', 'Unnamed: 28', 'Unnamed: 29', 'Unnamed: 30', 'Stockout Risk', 'Holdout Risk', 'Risk Score', 'High Risk', 'EOQ Deviation', 'EOQ Deviates Significantly']
