In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load the dataset with correct encoding
file_path = "//content/ML MODEL.csv"  # Ensure correct file name and location
df = pd.read_csv(file_path, encoding='utf-8')

# Clean column names by stripping spaces and converting to lowercase
df.columns = df.columns.str.strip().str.lower()

# Verify column names
print("Dataset Columns:", df.columns)

# Define selected features and target variable
selected_features = ['layer_height', 'wall_thickness', 'infill_density', 'nozzle_temperature', 'print_speed', 'material']
X = df[selected_features]
y = df['tensile_strength']

# Identify categorical and numerical columns
categorical_cols = ['material']
numerical_cols = ['layer_height', 'wall_thickness', 'infill_density', 'nozzle_temperature', 'print_speed']

# Preprocessing for categorical data (one-hot encoding)
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numerical_cols),  # Numerical columns are passed through as-is
        ('cat', categorical_transformer, categorical_cols)  # Categorical columns are one-hot encoded
    ])

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Regressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Create a pipeline that combines preprocessing and the model
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', model)
])

# Train the model
pipeline.fit(X_train, y_train)

# Make predictions on the test set
y_pred = pipeline.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f'Mean Absolute Error (MAE): {mae:.2f}')
print(f'Mean Squared Error (MSE): {mse:.2f}')
print(f'Root Mean Squared Error (RMSE): {rmse:.2f}')
print(f'R-squared (R2): {r2:.2f}')

# Function to predict tensile strength
def predict_tensile_strength():
    print("\nEnter the parameters for prediction:")
    layer_height = float(input("Layer Height (mm): "))
    wall_thickness = float(input("Wall Thickness (mm): "))
    infill_density = int(input("Infill Density (%): "))
    nozzle_temperature = int(input("Nozzle Temperature (°C): "))
    print_speed = int(input("Print Speed (mm/s): "))
    material = input("Material (e.g., PLA, ABS): ")

    input_data = pd.DataFrame({
        'layer_height': [layer_height],
        'wall_thickness': [wall_thickness],
        'infill_density': [infill_density],
        'nozzle_temperature': [nozzle_temperature],
        'print_speed': [print_speed],
        'material': [material]
    })

    prediction = pipeline.predict(input_data)
    print(f'Predicted Tensile Strength: {prediction[0]:.2f} MPa')

# Run the prediction function
predict_tensile_strength()


Dataset Columns: Index(['layer_height', 'wall_thickness', 'infill_density', 'infill_pattern',
       'nozzle_temperature', 'bed_temperature', 'print_speed', 'material',
       'fan_speed', 'roughness', 'tensile_strength', 'elongation'],
      dtype='object')
Mean Absolute Error (MAE): 2.53
Mean Squared Error (MSE): 10.05
Root Mean Squared Error (RMSE): 3.17
R-squared (R2): 0.83

Enter the parameters for prediction:
