In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import io # To read the string data as a file

# Your provided CSV data as a string
csv_data = """ID,PartID,PartName,PartCategory,Supplier,UnitCost,QuantityOnHand,ReorderPoint,SafetyStock,ProductionOrderNumber,ProductionStartDate,ProductionEndDate,CycleTime,MachineID,QualityCheckStatus,InventoryLocation,LastUpdatedDate
1,P001,Engine Block,Sub-Assembly,Precision Auto,250.00,50,20,10,PO1001,2025-03-01,2025-03-05,72,M101,Pass,Assembly Line 1,2025-04-03
2,P002,Door Panel,Component,Global Metals,75.50,120,40,15,N/A,,,,,Warehouse A,2025-04-03
3,P003,Tire,Raw Material,Quality Components,45.00,200,100,50,N/A,,,,,Warehouse B,2025-04-03
4,P004,Seat,Component,Rapid Supply,110.00,80,30,20,N/A,,,,,Factory Floor,2025-04-03
5,P005,Windshield,Finished Good,OEM Suppliers,320.00,25,10,5,PO1002,2025-03-10,2025-03-12,48,M102,Pass,Assembly Line 2,2025-04-03
6,P006,Dashboard,Sub-Assembly,AutoParts Inc.,190.00,40,15,10,PO1003,2025-03-05,2025-03-08,60,M103,Pass,Assembly Line 1,2025-04-03
7,P007,Bumper,Component,Prime Auto,65.00,150,50,25,N/A,,,,,Warehouse A,2025-04-03
8,P008,Chassis,Finished Good,Superior Steel,800.00,10,5,3,PO1004,2025-03-12,2025-03-18,96,M104,Pass,Factory Floor,2025-04-03
9,P009,Gearbox,Sub-Assembly,Advance Plastics,430.00,30,12,8,PO1005,2025-03-08,2025-03-11,54,M105,Pass,Assembly Line 2,2025-04-03
10,P010,Steering Wheel,Component,Elite Components,95.00,90,30,15,N/A,,,,,Warehouse B,2025-04-03
11,P011,Exhaust System,Sub-Assembly,Precision Auto,210.00,35,15,10,PO1006,2025-03-15,2025-03-18,66,M106,Pass,Assembly Line 3,2025-04-03
12,P012,Suspension,Component,Global Metals,150.00,60,25,10,N/A,,,,,Factory Floor,2025-04-03
13,P013,Brake Pad,Component,Quality Components,25.00,300,100,50,N/A,,,,,Warehouse A,2025-04-03
14,P014,Radiator,Component,Rapid Supply,135.00,45,20,10,N/A,,,,,Warehouse B,2025-04-03
15,P015,Headlight,Component,OEM Suppliers,80.00,100,40,20,N/A,,,,,Factory Floor,2025-04-03
16,P016,Engine Block,Sub-Assembly,AutoParts Inc.,260.00,55,20,10,PO1007,2025-03-02,2025-03-06,70,M107,Pass,Assembly Line 1,2025-04-03
17,P017,Door Panel,Component,Prime Auto,70.00,130,45,20,N/A,,,,,Warehouse A,2025-04-03
18,P018,Tire,Raw Material,Superior Steel,50.00,220,110,55,N/A,,,,,Warehouse B,2025-04-03
19,P019,Seat,Component,Advance Plastics,115.00,85,35,20,N/A,,,,,Factory Floor,2025-04-03
20,P020,Windshield,Finished Good,Elite Components,310.00,28,12,5,PO1008,2025-03-11,2025-03-14,50,M108,Pass,Assembly Line 2,2025-04-03
"""

# 1. Load Data using pandas
df = pd.read_csv(io.StringIO(csv_data))

# Ensure relevant columns are numeric
numeric_cols = ['UnitCost', 'QuantityOnHand', 'ReorderPoint', 'SafetyStock', 'CycleTime']
for col in numeric_cols:
    # errors='coerce' will turn non-numeric values into NaN (Not a Number)
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Handle potential missing values created by coercion or already present (if any)
# For simplicity, we'll fill potential NaNs in features with 0 or median/mean if appropriate.
# In this specific dataset, QuantityOnHand, ReorderPoint, SafetyStock seem complete.
# Let's assume 0 is okay if they were missing for this example.
df['QuantityOnHand'].fillna(0, inplace=True)
df['ReorderPoint'].fillna(0, inplace=True)
df['SafetyStock'].fillna(0, inplace=True)


# 2. Feature Engineering
# Create the target variable: 1 if supply needed, 0 otherwise
df['NeedsSupply'] = (df['QuantityOnHand'] <= df['ReorderPoint']).astype(int)

# Select features for the model
# Using the most direct indicators for this rule-based target
features = ['QuantityOnHand', 'ReorderPoint', 'SafetyStock']
X = df[features]
y = df['NeedsSupply']

# 3. Model Training
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Initialize Random Forest Classifier
# n_estimators=100 is a common default, random_state for reproducibility
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')

# Train the model
print("Training Random Forest model...")
rf_model.fit(X_train, y_train)
print("Model training complete.")

# 4. Prediction
# Predict supply need for the entire dataset
print("\nPredicting supply needs for all parts...")
df['PredictedNeedsSupply'] = rf_model.predict(X)

# --- Model Evaluation (Optional but Recommended) ---
print("\nEvaluating model performance on test data...")
y_pred_test = rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred_test)
report = classification_report(y_test, y_pred_test)

print(f"Model Accuracy on Test Set: {accuracy:.4f}")
print("Classification Report on Test Set:\n", report)
# --- End Evaluation ---


# 5. Identify Parts and Suppliers for Action
print("\n--- Identifying Parts Requiring Supply (based on prediction) ---")

# Filter DataFrame for parts where the model predicted 'NeedsSupply' (1)
parts_to_order = df[df['PredictedNeedsSupply'] == 1]

if parts_to_order.empty:
    print("\nNo parts currently predicted to require supply based on the model.")
else:
    print(f"\nFound {len(parts_to_order)} part(s) predicted to require supply:")
    for index, row in parts_to_order.iterrows():
        part_id = row['PartID']
        part_name = row['PartName']
        supplier = row['Supplier']
        qty_on_hand = row['QuantityOnHand']
        reorder_pt = row['ReorderPoint']

        print(f"\n  * ACTION: Order Part {part_id} ('{part_name}')")
        print(f"    - Current Quantity: {qty_on_hand}")
        print(f"    - Reorder Point: {reorder_pt}")
        print(f"    - Get from Supplier: '{supplier}'")
        # In a real system, you would trigger a Purchase Order (PO) process here.
        print(f"    - SIMULATING: Placing order with {supplier} for {part_id}...")

print("\n--- Process Complete ---")

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['QuantityOnHand'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['ReorderPoint'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alway

Training Random Forest model...
Model training complete.

Predicting supply needs for all parts...

Evaluating model performance on test data...
Model Accuracy on Test Set: 1.0000
Classification Report on Test Set:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         6

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6


--- Identifying Parts Requiring Supply (based on prediction) ---

No parts currently predicted to require supply based on the model.

--- Process Complete ---


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import io # To read the string data as a file

# Your provided CSV data as a string
# --- MODIFIED FOR EXAMPLE ---
# We will manually set QuantityOnHand below ReorderPoint for P008 and P009
# Original P008: QuantityOnHand=10, ReorderPoint=5 (Not below) -> Changed to QuantityOnHand=4
# Original P009: QuantityOnHand=30, ReorderPoint=12 (Not below) -> Changed to QuantityOnHand=10
csv_data = """ID,PartID,PartName,PartCategory,Supplier,UnitCost,QuantityOnHand,ReorderPoint,SafetyStock,ProductionOrderNumber,ProductionStartDate,ProductionEndDate,CycleTime,MachineID,QualityCheckStatus,InventoryLocation,LastUpdatedDate
1,P001,Engine Block,Sub-Assembly,Precision Auto,250.00,50,20,10,PO1001,2025-03-01,2025-03-05,72,M101,Pass,Assembly Line 1,2025-04-03
2,P002,Door Panel,Component,Global Metals,75.50,120,40,15,N/A,,,,,Warehouse A,2025-04-03
3,P003,Tire,Raw Material,Quality Components,45.00,200,100,50,N/A,,,,,Warehouse B,2025-04-03
4,P004,Seat,Component,Rapid Supply,110.00,80,30,20,N/A,,,,,Factory Floor,2025-04-03
5,P005,Windshield,Finished Good,OEM Suppliers,320.00,25,10,5,PO1002,2025-03-10,2025-03-12,48,M102,Pass,Assembly Line 2,2025-04-03
6,P006,Dashboard,Sub-Assembly,AutoParts Inc.,190.00,40,15,10,PO1003,2025-03-05,2025-03-08,60,M103,Pass,Assembly Line 1,2025-04-03
7,P007,Bumper,Component,Prime Auto,65.00,150,50,25,N/A,,,,,Warehouse A,2025-04-03
8,P008,Chassis,Finished Good,Superior Steel,800.00,4,5,3,PO1004,2025-03-12,2025-03-18,96,M104,Pass,Factory Floor,2025-04-03
9,P009,Gearbox,Sub-Assembly,Advance Plastics,430.00,10,12,8,PO1005,2025-03-08,2025-03-11,54,M105,Pass,Assembly Line 2,2025-04-03
10,P010,Steering Wheel,Component,Elite Components,95.00,90,30,15,N/A,,,,,Warehouse B,2025-04-03
11,P011,Exhaust System,Sub-Assembly,Precision Auto,210.00,35,15,10,PO1006,2025-03-15,2025-03-18,66,M106,Pass,Assembly Line 3,2025-04-03
12,P012,Suspension,Component,Global Metals,150.00,60,25,10,N/A,,,,,Factory Floor,2025-04-03
13,P013,Brake Pad,Component,Quality Components,25.00,300,100,50,N/A,,,,,Warehouse A,2025-04-03
14,P014,Radiator,Component,Rapid Supply,135.00,45,20,10,N/A,,,,,Warehouse B,2025-04-03
15,P015,Headlight,Component,OEM Suppliers,80.00,100,40,20,N/A,,,,,Factory Floor,2025-04-03
16,P016,Engine Block,Sub-Assembly,AutoParts Inc.,260.00,55,20,10,PO1007,2025-03-02,2025-03-06,70,M107,Pass,Assembly Line 1,2025-04-03
17,P017,Door Panel,Component,Prime Auto,70.00,130,45,20,N/A,,,,,Warehouse A,2025-04-03
18,P018,Tire,Raw Material,Superior Steel,50.00,220,110,55,N/A,,,,,Warehouse B,2025-04-03
19,P019,Seat,Component,Advance Plastics,115.00,85,35,20,N/A,,,,,Factory Floor,2025-04-03
20,P020,Windshield,Finished Good,Elite Components,310.00,28,12,5,PO1008,2025-03-11,2025-03-14,50,M108,Pass,Assembly Line 2,2025-04-03
"""

# 1. Load Data using pandas
df = pd.read_csv(io.StringIO(csv_data))

# Ensure relevant columns are numeric
numeric_cols = ['UnitCost', 'QuantityOnHand', 'ReorderPoint', 'SafetyStock', 'CycleTime']
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Handle potential missing values (using 0 fill for simplicity here)
df['QuantityOnHand'].fillna(0, inplace=True)
df['ReorderPoint'].fillna(0, inplace=True)
df['SafetyStock'].fillna(0, inplace=True)


# 2. Feature Engineering
# Create the target variable: 1 if supply needed, 0 otherwise
# The model will learn this rule.
df['NeedsSupply'] = (df['QuantityOnHand'] <= df['ReorderPoint']).astype(int)

# Select features for the model
features = ['QuantityOnHand', 'ReorderPoint', 'SafetyStock']
X = df[features]
y = df['NeedsSupply']

# 3. Model Training
# Split data into training and testing sets (optional for demo, good practice)
# stratify=y helps ensure proportion of classes is same in train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Initialize Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')

# Train the model
print("Training Random Forest model...")
rf_model.fit(X_train, y_train)
print("Model training complete.")

# --- Model Evaluation (Optional but Recommended) ---
print("\nEvaluating model performance on test data...")
y_pred_test = rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred_test)
# Note: With few positive examples, metrics like precision/recall might be 0 if
# the positive examples ended up only in the training set after the split.
# Predicting on the full dataset later ensures we see the intended result.
report = classification_report(y_test, y_pred_test, zero_division=0)

print(f"Model Accuracy on Test Set: {accuracy:.4f}")
print("Classification Report on Test Set:\n", report)
# --- End Evaluation ---


# 4. Prediction on Entire Dataset
print("\nPredicting supply needs for all parts using the trained model...")
# Use the trained model to predict for every part in the original dataframe
df['PredictedNeedsSupply'] = rf_model.predict(X)


# 5. Identify Parts and Get Supplier Information
print("\n--- Identifying Parts Requiring Supply (based on prediction) ---")

# Filter DataFrame for parts where the model predicted 'NeedsSupply' (1)
parts_to_order = df[df['PredictedNeedsSupply'] == 1].copy() # Use .copy() to avoid SettingWithCopyWarning

if parts_to_order.empty:
    print("\nNo parts currently predicted to require supply based on the model.")
    print("NOTE: This might happen if the modified parts ended up only in the test set during split,")
    print("or if the model didn't perfectly learn the rule from the limited examples.")
    print("Let's double-check the data manually based on the rule:")
    # Manual check based on the rule, bypassing model prediction for verification
    manual_check = df[df['QuantityOnHand'] <= df['ReorderPoint']].copy()
    if not manual_check.empty:
         print("\n--- MANUAL CHECK based on Rule (Qty <= Reorder Pt) ---")
         for index, row in manual_check.iterrows():
            print(f"\n  * Rule Triggered for Part {row['PartID']} ('{row['PartName']}')")
            print(f"    - Current Quantity: {row['QuantityOnHand']}")
            print(f"    - Reorder Point: {row['ReorderPoint']}")
            print(f"    - Supplier: '{row['Supplier']}'")

else:
    print(f"\nFound {len(parts_to_order)} part(s) predicted by the model to require supply:")
    for index, row in parts_to_order.iterrows():
        part_id = row['PartID']
        part_name = row['PartName']
        supplier = row['Supplier']
        qty_on_hand = row['QuantityOnHand']
        reorder_pt = row['ReorderPoint']

        # This is where you'd integrate with a purchasing system
        print(f"\n  * ACTION REQUIRED: Order Part {part_id} ('{part_name}')")
        print(f"    - Reason: Predicted Need (Qty: {qty_on_hand} <= Reorder Pt: {reorder_pt})")
        print(f"    - Contact Supplier: '{supplier}'")
        print(f"    - SIMULATING: Initiating order process with {supplier} for {part_id}...")

print("\n--- Process Complete ---")

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['QuantityOnHand'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['ReorderPoint'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alway

Training Random Forest model...
Model training complete.

Evaluating model performance on test data...
Model Accuracy on Test Set: 0.8333
Classification Report on Test Set:
               precision    recall  f1-score   support

           0       0.83      1.00      0.91         5
           1       0.00      0.00      0.00         1

    accuracy                           0.83         6
   macro avg       0.42      0.50      0.45         6
weighted avg       0.69      0.83      0.76         6


Predicting supply needs for all parts using the trained model...

--- Identifying Parts Requiring Supply (based on prediction) ---

Found 1 part(s) predicted by the model to require supply:

  * ACTION REQUIRED: Order Part P009 ('Gearbox')
    - Reason: Predicted Need (Qty: 10 <= Reorder Pt: 12)
    - Contact Supplier: 'Advance Plastics'
    - SIMULATING: Initiating order process with Advance Plastics for P009...

--- Process Complete ---
