In [26]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import matplotlib.pyplot as plt
import pickle  # Import the pickle module

# Read and prepare the data
def prepare_data(data):
    # Convert price from string to float
    data['Price'] = data['Price'].str.replace('$', '').str.replace(',', '').astype(float)
    
    # Convert RM Cost/Lb from string to float
    data['RM Cost/Lb'] = data['RM Cost/Lb'].str.replace('$', '').astype(float)
    
    # Select features and target
    X = data[['RM Cost/Lb', 'Volume', 'Alpha', 'Beta', 'Gamma']]
    y = data['Price']
    
    return X, y

# Create and evaluate the model
def train_evaluate_model(X, y):
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Create and train the model
    model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    
    # Print results
    print("\nGradient Boosting Model Performance:")
    print(f"R² Score: {r2:.4f}")
    print(f"RMSE: ${rmse:.2f}")
    
    # Feature importance
    feature_importance = model.feature_importances_
    
    
    
    return model, X_test, y_test, y_pred

# Plot actual vs predicted values


# Main execution
def main():
    # Read the data
    data = pd.read_csv('/Users/manassingh/mlManufacturing/machineLearningSourcing - Sheet1 (3).csv')
    
    # Prepare the data
    X, y = prepare_data(data)
    
    # Train and evaluate the model
    model, X_test, y_test, y_pred = train_evaluate_model(X, y)
    
   
    
    # Save the trained model using pickle
    with open('trained_model.pkl', 'wb') as f:
        pickle.dump(model, f)
    
    
    return model

if __name__ == "__main__":
    model = main()



Gradient Boosting Model Performance:
R² Score: 0.9641
RMSE: $42999.38
