In [36]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from xgboost import XGBRegressor

In [37]:
data = pd.read_csv('dine_areca (1).csv')

In [38]:
numeric_cols = ['Length', 'Width', 'Diameter', 'TriSide', 'Height']
data[numeric_cols] = data[numeric_cols].replace('-', -1).astype(float)

In [39]:
data['Length'] = data.apply(lambda row: -1 if row['Shape'] in ['Triangle', 'Round'] else row['Length'], axis=1)
data['Width'] = data.apply(lambda row: -1 if row['Shape'] in ['Triangle', 'Round'] else row['Width'], axis=1)
data['Diameter'] = data.apply(lambda row: row['Diameter'] if row['Shape'] == 'Round' else -1, axis=1)
data['TriSide'] = data.apply(lambda row: row['TriSide'] if row['Shape'] == 'Triangle' else -1, axis=1)


In [40]:
features = data[['Shape', 'Type', 'Length', 'Width', 'Diameter', 'TriSide', 'Height']]
targets = data[['TopTemp', 'BotTemp', 'PreHeat', 'Cut', 'LUP_Curing', 'Bot_Curing', 'LUP_sec', 'LUP_cm', 'RT']]


In [41]:
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded_features = encoder.fit_transform(features[['Shape', 'Type']])

In [42]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features[['Length', 'Width', 'Diameter', 'TriSide', 'Height']])

In [43]:
processed_features = np.concatenate([encoded_features, scaled_features], axis=1)

In [44]:
X_train, X_test, y_train, y_test = train_test_split(processed_features, targets, test_size=0.2, random_state=42)


In [45]:
model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
model.fit(X_train, y_train)

In [46]:
predictions = model.predict(X_test)

In [47]:
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print(f'Test MSE: {mse}')
print(f'Test RMSE: {rmse}')
print(f'Test MAE: {mae}')
print(f'R-squared: {r2}')

Test MSE: 188.49210650116942
Test RMSE: 13.72924275046404
Test MAE: 7.776337165443433
R-squared: -1.6172558814994167


In [48]:
def predict_new_data(new_data):
    # Replace '-' with -1 in numeric columns
    new_data[numeric_cols] = new_data[numeric_cols].replace('-', -1).astype(float)

    # Fill missing values in new data based on the shape
    new_data['Length'] = new_data.apply(lambda row: -1 if row['Shape'] in ['Triangle', 'Round'] else row['Length'], axis=1)
    new_data['Width'] = new_data.apply(lambda row: -1 if row['Shape'] in ['Triangle', 'Round'] else row['Width'], axis=1)
    new_data['Diameter'] = new_data.apply(lambda row: row['Diameter'] if row['Shape'] == 'Round' else -1, axis=1)
    new_data['TriSide'] = new_data.apply(lambda row: row['TriSide'] if row['Shape'] == 'Triangle' else -1, axis=1)

    encoded_new_data = encoder.transform(new_data[['Shape', 'Type']])
    scaled_new_data = scaler.transform(new_data[['Length', 'Width', 'Diameter', 'TriSide', 'Height']])
    processed_new_data = np.concatenate([encoded_new_data, scaled_new_data], axis=1)

    return model.predict(processed_new_data)

In [51]:
new_data = pd.DataFrame({
    'Shape': ['Rectangle'],
    'Type': ['Plate'],
    'Length': [15.24],
    'Width': [7.62],
    'Diameter': ['-'],
    'TriSide': ['-'],
    'Height': [1.905]
})

predictions_new = predict_new_data(new_data)

# Print the predicted values in a readable format
predicted_values = np.round(predictions_new, decimals=2)
print(predicted_values)

[[1.5528e+02 1.4993e+02 2.9980e+01 9.9300e+00 1.0030e+01 4.9960e+01
  1.1000e-01 6.0000e-02 1.9982e+02]]


  new_data[numeric_cols] = new_data[numeric_cols].replace('-', -1).astype(float)
