In [62]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from xgboost import XGBRegressor
import os

In [63]:
data = pd.read_csv('dine_areca (1).csv')

In [64]:
data.head()

Unnamed: 0,Shape,Type,Length,Width,Diameter,TriSide,Height,TopTemp,BotTemp,PreHeat,Cut,LUP_Curing,Bot _Curing,LUP _sec,LUP_cm,RT
0,Square,Bowl,12.7,12.7,-,-,3.81,155,155,40,5,15,70,0.125,0.0625,150.125
1,Triangle,Bowl,-,-,-,12.7,3.81,155,155,40,5,15,70,0.12,0.06,150.12
2,Triangle,Bowl,-,-,-,15.24,4.445,155,150,40,5,10,60,0.05,0.025,135.05
3,Round,Bowl,-,-,15.24,-,4.445,140,140,40,5,15,50,0.13,0.065,130.13
4,Round,Bowl,-,-,17.78,-,5.08,155,145,35,5,30,40,0.12,0.06,130.12


In [65]:
print(data.columns)

Index(['Shape', 'Type', 'Length ', 'Width ', 'Diameter ', 'TriSide', 'Height',
       'TopTemp', 'BotTemp', 'PreHeat', 'Cut ', 'LUP_Curing ', 'Bot _Curing',
       'LUP _sec', 'LUP_cm', 'RT'],
      dtype='object')


In [66]:
numeric_cols = ['Length ', 'Width ', 'Diameter ', 'TriSide', 'Height']
data[numeric_cols] = data[numeric_cols].replace('-', -1).astype(float)

In [67]:
data['Length '] = data.apply(lambda row: -1 if row['Shape'] in ['Triangle', 'Round'] else row['Length '], axis=1)
data['Width '] = data.apply(lambda row: -1 if row['Shape'] in ['Triangle', 'Round'] else row['Width '], axis=1)
data['Diameter '] = data.apply(lambda row: row['Diameter '] if row['Shape'] == 'Round' else -1, axis=1)
data['TriSide'] = data.apply(lambda row: row['TriSide'] if row['Shape'] == 'Triangle' else -1, axis=1)

In [68]:
features = data[['Shape', 'Type', 'Length ', 'Width ', 'Diameter ', 'TriSide', 'Height']]
targets = data[['TopTemp', 'BotTemp', 'PreHeat', 'Cut ', 'LUP_Curing ', 'Bot _Curing',
       'LUP _sec', 'LUP_cm', 'RT']]


In [69]:
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded_features = encoder.fit_transform(features[['Shape', 'Type']])

In [70]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features[['Length ', 'Width ', 'Diameter ', 'TriSide', 'Height']])

In [71]:
processed_features = np.concatenate([encoded_features, scaled_features], axis=1)

In [72]:
X_train, X_test, y_train, y_test = train_test_split(processed_features, targets, test_size=0.2, random_state=42)

In [73]:
model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
model.fit(X_train, y_train)

In [74]:
predictions = model.predict(X_test)

In [75]:
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print(f'Test MSE: {mse}')
print(f'Test RMSE: {rmse}')
print(f'Test MAE: {mae}')
print(f'R-squared: {r2}')

Test MSE: 138.3538709971167
Test RMSE: 11.76239223105218
Test MAE: 7.2662017649536335
R-squared: -2.7192732891104816


In [76]:
def predict_new_data(new_data):
    # Replace '-' with -1 in numeric columns
    new_data[numeric_cols] = new_data[numeric_cols].replace('-', -1).astype(float)

    # Fill missing values in new data based on the shape
    new_data['Length '] = new_data.apply(lambda row: -1 if row['Shape'] in ['Triangle', 'Round'] else row['Length '], axis=1)
    new_data['Width '] = new_data.apply(lambda row: -1 if row['Shape'] in ['Triangle', 'Round'] else row['Width '], axis=1)
    new_data['Diameter '] = new_data.apply(lambda row: row['Diameter '] if row['Shape'] == 'Round' else -1, axis=1)
    new_data['TriSide'] = new_data.apply(lambda row: row['TriSide'] if row['Shape'] == 'Triangle' else -1, axis=1)

    encoded_new_data = encoder.transform(new_data[['Shape', 'Type']])
    scaled_new_data = scaler.transform(new_data[['Length ', 'Width ', 'Diameter ', 'TriSide', 'Height']])
    processed_new_data = np.concatenate([encoded_new_data, scaled_new_data], axis=1)

    return model.predict(processed_new_data)

In [83]:
def get_user_input():
    try:
        shape = input("Enter Shape: ")
        type_ = input("Enter Type: ")
        length = input("Enter Length (cm): ")
        width = input("Enter Width (cm): ")
        diameter = input("Enter Diameter (cm): ")
        tri_side = input("Enter Triangle Side (cm): ")
        height = input("Enter Height (cm): ")

        # Validate inputs
        if not shape or not type_:
            raise ValueError("Shape and Type are required fields.")

        length = float(length) if length else -1
        width = float(width) if width else -1
        diameter = float(diameter) if diameter else -1
        tri_side = float(tri_side) if tri_side else -1
        height = float(height) if height else -1

        return pd.DataFrame({
            'Shape': [shape],
            'Type': [type_],
            'Length ': [length],
            'Width ': [width],
            'Diameter ': [diameter],
            'TriSide': [tri_side],
            'Height': [height]
        })
    except ValueError as e:
        print(f"Please enter inputs correctly: {e}")
        return get_user_input()
new_data = get_user_input()

In [84]:
predictions_new = predict_new_data(new_data)

# Print the predicted values in a readable format
predicted_values = np.round(predictions_new)
print(predicted_values)

[[158. 148.  26.   6.  15.  33.   0.   0. 102.]]


In [85]:
# Create a DataFrame to store user input and predicted values
results = new_data.copy()
results[['TopTemp', 'BotTemp', 'PreHeat', 'Cut ', 'LUP_Curing ', 'Bot _Curing', 'LUP _sec', 'LUP_cm', 'RT']] = predicted_values



In [86]:
file_name = 'user_input_and_predictions.xlsx'

In [87]:
if os.path.exists(file_name):
    existing_data = pd.read_excel(file_name)
    updated_data = pd.concat([existing_data, results], ignore_index=True)
else:
    updated_data = results

updated_data.to_excel(file_name, index=False)
print(f"User input and predictions have been saved to '{file_name}'.")

User input and predictions have been saved to 'user_input_and_predictions.xlsx'.


In [88]:
os.system(f"code {file_name}")

0

In [90]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from xgboost import XGBRegressor
import os

# Load the data
data = pd.read_csv('dine_areca (1).csv')

# Define numeric columns
numeric_cols = ['Length ', 'Width ', 'Diameter ', 'TriSide', 'Height']

# Replace '-' with -1 and convert to float
data[numeric_cols] = data[numeric_cols].replace('-', -1).astype(float)

# Fill missing values based on the shape
data['Length '] = data.apply(lambda row: -1 if row['Shape'] in ['Triangle', 'Round'] else row['Length '], axis=1)
data['Width '] = data.apply(lambda row: -1 if row['Shape'] in ['Triangle', 'Round'] else row['Width '], axis=1)
data['Diameter '] = data.apply(lambda row: row['Diameter '] if row['Shape'] == 'Round' else -1, axis=1)
data['TriSide'] = data.apply(lambda row: row['TriSide'] if row['Shape'] == 'Triangle' else -1, axis=1)

# Define features and targets
features = data[['Shape', 'Type', 'Length ', 'Width ', 'Diameter ', 'TriSide', 'Height']]
targets = data[['TopTemp', 'BotTemp', 'PreHeat', 'Cut ', 'LUP_Curing ', 'Bot _Curing', 'LUP _sec', 'LUP_cm', 'RT']]

# One-hot encode categorical features
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded_features = encoder.fit_transform(features[['Shape', 'Type']])

# Standardize numeric features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features[['Length ', 'Width ', 'Diameter ', 'TriSide', 'Height']])

# Combine encoded and scaled features
processed_features = np.concatenate([encoded_features, scaled_features], axis=1)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(processed_features, targets, test_size=0.2, random_state=42)

# Train the model
model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print(f'Test MSE: {mse}')
print(f'Test RMSE: {rmse}')
print(f'Test MAE: {mae}')
print(f'R-squared: {r2}')

def predict_new_data(new_data):
    # Replace '-' with -1 in numeric columns
    new_data[numeric_cols] = new_data[numeric_cols].replace('-', -1).astype(float)

    # Fill missing values in new data based on the shape
    new_data['Length '] = new_data.apply(lambda row: -1 if row['Shape'] in ['Triangle', 'Round'] else row['Length '], axis=1)
    new_data['Width '] = new_data.apply(lambda row: -1 if row['Shape'] in ['Triangle', 'Round'] else row['Width '], axis=1)
    new_data['Diameter '] = new_data.apply(lambda row: row['Diameter '] if row['Shape'] == 'Round' else -1, axis=1)
    new_data['TriSide'] = new_data.apply(lambda row: row['TriSide'] if row['Shape'] == 'Triangle' else -1, axis=1)

    encoded_new_data = encoder.transform(new_data[['Shape', 'Type']])
    scaled_new_data = scaler.transform(new_data[['Length ', 'Width ', 'Diameter ', 'TriSide', 'Height']])
    processed_new_data = np.concatenate([encoded_new_data, scaled_new_data], axis=1)

    return model.predict(processed_new_data)

def get_user_input():
    try:
        shape = input("Enter Shape: ")
        type_ = input("Enter Type: ")
        length = input("Enter Length (cm): ")
        width = input("Enter Width (cm): ")
        diameter = input("Enter Diameter (cm): ")
        tri_side = input("Enter Triangle Side (cm): ")
        height = input("Enter Height (cm): ")

        # Validate inputs
        if not shape or not type_:
            raise ValueError("Shape and Type are required fields.")

        length = float(length) if length else -1
        width = float(width) if width else -1
        diameter = float(diameter) if diameter else -1
        tri_side = float(tri_side) if tri_side else -1
        height = float(height) if height else -1

        return pd.DataFrame({
            'Shape': [shape],
            'Type': [type_],
            'Length ': [length],
            'Width ': [width],
            'Diameter ': [diameter],
            'TriSide': [tri_side],
            'Height': [height]
        })
    except ValueError as e:
        print(f"Please enter inputs correctly: {e}")
        return get_user_input()

# Get new data from the user
new_data = get_user_input()

# Predict the values for the new data
predictions_new = predict_new_data(new_data)

# Print the predicted values in a readable format
predicted_values = np.round(predictions_new)
print(predicted_values)

# Create a DataFrame to store user input and predicted values
results = new_data.copy()
results[['TopTemp', 'BotTemp', 'PreHeat', 'Cut ', 'LUP_Curing ', 'Bot _Curing', 'LUP _sec', 'LUP_cm', 'RT']] = predicted_values

# Define the file name
file_name = 'user_input_and_predictions.xlsx'

# Append the new results to the existing Excel file or create it if it doesn't exist
if os.path.exists(file_name):
    existing_data = pd.read_excel(file_name)
    updated_data = pd.concat([existing_data, results], ignore_index=True)
else:
    updated_data = results

updated_data.to_excel(file_name, index=False)
print(f"User input and predictions have been saved to '{file_name}'.")

# Open the Excel file in VS Code
os.system(f"code {file_name}")


Test MSE: 138.3538709971167
Test RMSE: 11.76239223105218
Test MAE: 7.2662017649536335
R-squared: -2.7192732891104816
[[158. 148.  21.   4.  10.  40.   0.   0. 111.]]
User input and predictions have been saved to 'user_input_and_predictions.xlsx'.


0