In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [11]:
# Load the catering details data
catering_details = pd.read_csv('tbl_catering_details.csv')

# Load the catering data
catering = pd.read_csv('tbl_catering.csv')

# Load the catering products data
catering_products = pd.read_csv('tbl_catering_products.csv')

In [14]:
# Convert delivery date to a suitable format
catering['DELIVERY_DATE'] = pd.to_datetime(catering['DELIVERY_DATE'])

In [15]:
# Handle missing values
catering.fillna(0, inplace=True)
catering_details.fillna(0, inplace=True)
catering_products.fillna(0, inplace=True)

In [16]:
# Convert quantities to numeric format
catering_details['QUANTITY'] = pd.to_numeric(catering_details['QUANTITY'].str.replace('tr', '').str.replace('cups', '').replace('1,1/2', '1.5').replace('2 1/2', '2.5'), errors='coerce')

In [29]:
# Merge catering details with catering data


merged_data = pd.merge(catering, catering_details, left_on='ID', right_on='CATERING_ID', how='left')


In [30]:
# Create a mapping of dishes to their quantities
dish_quantity_map = merged_data.groupby('ITEM_DESC')['QUANTITY'].sum().reset_index()

In [31]:
# Create a pivot table for dish quantities per event
dish_quantity_pivot = merged_data.pivot_table(index='CATERING_ID', columns='ITEM_DESC', values='QUANTITY', fill_value=0)

In [32]:
# Add guest count to the pivot table

dish_quantity_pivot['GUEST_COUNT'] = merged_data['GUEST_COUNT']



In [49]:
# Save the dish quantity map to a CSV file
dish_quantity_map.to_csv('dish_quantity_map.csv', index=False)

In [33]:
# Select features and target variable
X = dish_quantity_pivot[['GUEST_COUNT']]
y = dish_quantity_pivot.drop('GUEST_COUNT', axis=1)

In [34]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [35]:
# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

In [46]:
import re
from sklearn.linear_model import LinearRegression
# Evaluate the model
from joblib import Memory
from sklearn.metrics import mean_squared_error, r2_score
# Evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
import joblib

# Save the model to a file
joblib.dump(model, 'catering_model_RE.pkl')

Mean Squared Error: 87.10917847889836


['catering_model_RE.pkl']

In [51]:
# Example input data
input_data = pd.DataFrame({
    'GUEST_COUNT': [100],
})

# Predict quantities
predictions = model.predict(input_data)

# Print the predicted quantities
print(predictions)

[[0.00000000e+00 1.03681298e-03 0.00000000e+00 0.00000000e+00
  2.15910800e-03 3.28976667e-03 1.71160141e-02 0.00000000e+00
  1.74755406e-03 3.01392839e-03 2.34983334e-03 1.63739296e-01
  0.00000000e+00 2.93092666e-03 9.10230911e-03 3.31191007e-02
  5.16963334e-03 3.09904133e-03 1.22202407e-03 9.36587873e-02
  6.39469136e-03 4.52876313e-01 9.94071960e-02 3.91929981e-02
  5.87894972e-01 2.29261574e-01 1.27901209e-03 1.87986667e-03
  1.59796049e-03 0.00000000e+00 4.00246542e-02 1.26127198e-01
  0.00000000e+00 3.51486938e-01 1.06530699e-02 0.00000000e+00
  0.00000000e+00 2.15910800e-03 2.13631279e-02 2.34983334e-02
  1.17491667e-01 2.26044630e-03 0.00000000e+00 3.51939668e-01
  2.81980001e-02 7.95579418e-01 4.16149893e-03 3.49510812e-01
  1.87986667e-03 1.24201631e-01 3.69339471e-02 8.69020152e-03
  7.84503663e-03 2.15910800e-03 2.10211997e-03 1.07955400e-03
  0.00000000e+00 1.40990000e-01 4.40921229e-02 2.96683945e-02
  2.34983334e-03 5.34648077e-03 2.22346092e-02 4.00164862e-02
  2.1201

In [53]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import joblib

# Load the catering details data
catering_details = pd.read_csv('tbl_catering_details.csv')

# Load the catering data
catering = pd.read_csv('tbl_catering.csv')

# Convert CATERING_ID to numeric type in both DataFrames
catering_details['CATERING_ID'] = pd.to_numeric(catering_details['CATERING_ID'], errors='coerce')
catering['CATERING_ID'] = pd.to_numeric(catering['CATERING_ID'], errors='coerce')

# Drop rows with missing values in CATERING_ID
catering_details.dropna(subset=['CATERING_ID'], inplace=True)
catering.dropna(subset=['CATERING_ID'], inplace=True)

# Merge catering details with catering data
merged_data = pd.merge(catering_details, catering, on='CATERING_ID', how='inner')

# Create a pivot table for dish quantities per event
dish_quantity_pivot = merged_data.pivot_table(index='CATERING_ID', columns='ITEM_DESC', values='QUANTITY', fill_value=0)

# Add guest count to the pivot table
dish_quantity_pivot['GUEST_COUNT'] = merged_data['GUEST_COUNT']

# Select features and target variable
X = dish_quantity_pivot[['GUEST_COUNT']]
y = dish_quantity_pivot.drop('GUEST_COUNT', axis=1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Save the model to a file
joblib.dump(model, 'catering_model_11.pkl')

# Load the saved model
model = joblib.load('catering_model11.pkl')

# Example input data
input_data = pd.DataFrame({
    'GUEST_COUNT': [100],
})

# Predict quantities
predictions = model.predict(input_data)

# Print the predicted quantities
print(predictions)

KeyError: 'CATERING_ID'