<a href="https://colab.research.google.com/github/PranavDscientist/Weight-prediction-for-self-checkout-app/blob/main/weight_prediction_regrssion_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import joblib
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error, r2_score

# Read the Excel file
df = pd.read_csv('/content/drive/MyDrive/fake2.csv')
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Identify numerical and categorical columns
numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

# Apply transformations to numerical columns
numerical_transformer = MinMaxScaler()
X[numerical_cols] = numerical_transformer.fit_transform(X[numerical_cols])

# Apply transformations to categorical columns
categorical_transformer = OneHotEncoder(drop='first', sparse_output=False)
categorical_data = categorical_transformer.fit_transform(X[categorical_cols])

# Concatenate the transformed numerical and categorical data
X_transformed = pd.concat([pd.DataFrame(X[numerical_cols]), pd.DataFrame(categorical_data, columns=categorical_transformer.get_feature_names_out(categorical_cols))], axis=1)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.3, random_state=1)

# Build and train the model
model = DecisionTreeRegressor()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate evaluation metrics
print('MAE:', mean_absolute_error(y_test, y_pred))
print('MAPE:', mean_absolute_percentage_error(y_test, y_pred))
print('MSE:', mean_squared_error(y_test, y_pred))
print('R2 score:', r2_score(y_test, y_pred))

# Save the trained model and transformers
joblib.dump(model, 'tree_model.joblib')
joblib.dump(numerical_transformer, 'numerical_transformer.joblib')
joblib.dump(categorical_transformer, 'categorical_transformer.joblib')

MAE: 22.220736150702393
MAPE: 0.09456837389807642
MSE: 3196.8296399836313
R2 score: 0.9953036245424443


['categorical_transformer.joblib']

In [2]:
new_data = [['plastic', 250, 'g']]

# Convert the new data to a DataFrame
new_data_df = pd.DataFrame(new_data, columns=X.columns)

# Apply transformations to the new data
new_numerical_data = numerical_transformer.transform(new_data_df[numerical_cols])
new_categorical_data = categorical_transformer.transform(new_data_df[categorical_cols])

# Concatenate the transformed numerical and categorical data for new data
new_data_transformed = pd.concat([pd.DataFrame(new_numerical_data, columns=numerical_cols), pd.DataFrame(new_categorical_data, columns=categorical_transformer.get_feature_names_out(categorical_cols))], axis=1)

# Predict on the new data
y_pred = model.predict(new_data_transformed)

print('Predicted weight:', y_pred.item())

Predicted weight: 301.0
