In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer
import joblib  # For saving/loading model
import os      # For file existence check

In [2]:

# --- 1. Load dataset ---
try:
    df = pd.read_excel("new.xlsx")
except FileNotFoundError:
    print("Error: The file 'new.xlsx' was not found.")
    exit()

# Drop unnecessary columns
if "Unnamed: 0" in df.columns:
    df = df.drop(columns=["Unnamed: 0"])

# Copy original dataframe
df_original = df.copy()

In [3]:
# --- 2. Handle categorical variables ---
cat_cols = df.select_dtypes(include=['object']).columns
print("Categorical Columns:", list(cat_cols))

label_encoders = {}
for col in cat_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

Categorical Columns: ['appointment_id', 'customer_id', 'booking_date', 'booking_time', 'booking_datetime', 'created_at', 'service_type', 'staff_assigned', 'status', 'weather', 'channel', 'name', 'email', 'gender', 'city', 'joined_date', 'tags', 'primary_business', 'primary_service']


In [4]:
# --- 3. Split Features & Target ---
# Assuming 'status' is the target column
X = df.drop(columns=["status"])
y = df["status"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Handle missing values
imputer = SimpleImputer(strategy="constant", fill_value=0)
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

In [5]:
# --- 4. Train Gradient Boosting Classifier ---
print("\n--- Model Training: Gradient Boosting Classifier ---")
model = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.0092,
    subsample=0.84555,
    max_depth=10,
    random_state=42
)
model.fit(X_train, y_train)
print("✅ Gradient Boosting Classifier trained.")



--- Model Training: Gradient Boosting Classifier ---
✅ Gradient Boosting Classifier trained.


In [6]:
# --- 5. Evaluate ---
y_pred = model.predict(X_test)

print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.6825

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       300
           1       0.67      1.00      0.80      1571
           2       0.00      0.00      0.00       189
           3       0.00      0.00      0.00       273
           4       1.00      1.00      1.00        67

    accuracy                           0.68      2400
   macro avg       0.33      0.40      0.36      2400
weighted avg       0.47      0.68      0.55      2400



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [7]:
# --- 6. Save the trained model and LabelEncoders ---
print("\n--- Saving the trained model and LabelEncoders ---")
joblib.dump(model, "mango_model.pkl")
joblib.dump(label_encoders, "mango_label_encoders.pkl")
print("✅ Model and LabelEncoders saved successfully as 'mango_model.pkl' and 'mango_label_encoders.pkl'.")


--- Saving the trained model and LabelEncoders ---
✅ Model and LabelEncoders saved successfully as 'mango_model.pkl' and 'mango_label_encoders.pkl'.


In [8]:
# --- 7. Load the saved model and LabelEncoders ---
print("\n--- Loading the saved model and LabelEncoders ---")
if os.path.exists("mango_model.pkl") and os.path.exists("mango_label_encoders.pkl"):
    loaded_model = joblib.load("mango_model.pkl")
    loaded_label_encoders = joblib.load("mango_label_encoders.pkl")
    print("✅ Model and LabelEncoders loaded successfully.")
else:
    print("Error: Saved model or encoder files not found.")
    exit()



--- Loading the saved model and LabelEncoders ---
✅ Model and LabelEncoders loaded successfully.


In [9]:
# --- 8. Test with new user input ---
print("\n--- Testing with User Input ---")
user_input_data = {}

for col in X.columns:
    if col in cat_cols:
        le = loaded_label_encoders[col]
        options = le.classes_
        print(f"\nSelect a value for '{col}':")
        for i, option in enumerate(options):
            print(f"[{i}] {option}")
        while True:
            try:
                choice_index = int(input("Enter the number corresponding to your choice: "))
                if 0 <= choice_index < len(options):
                    value = options[choice_index]
                    break
                else:
                    print("Invalid choice. Please enter a valid number.")
            except ValueError:
                print("Invalid input. Please enter a number.")
    else:
        value = input(f"Enter value for '{col}': ")
    user_input_data[col] = [value]

# Convert input to DataFrame
user_df = pd.DataFrame(user_input_data)

# Encode categorical columns
for col in cat_cols:
    if col in user_df.columns:
        le = loaded_label_encoders[col]
        user_df[col] = le.transform(user_df[col])

# Ensure numeric format
user_df = user_df.apply(pd.to_numeric)

# Prediction
prediction = loaded_model.predict(user_df)

print("\nPrediction for your input:")
print(f"The predicted status is: {prediction[0]}")



--- Testing with User Input ---

Select a value for 'appointment_id':
[0] APPT0000001
[1] APPT0000002
[2] APPT0000003
[3] APPT0000004
[4] APPT0000005
[5] APPT0000006
[6] APPT0000007
[7] APPT0000008
[8] APPT0000009
[9] APPT0000010
[10] APPT0000011
[11] APPT0000012
[12] APPT0000013
[13] APPT0000014
[14] APPT0000015
[15] APPT0000016
[16] APPT0000017
[17] APPT0000018
[18] APPT0000019
[19] APPT0000020
[20] APPT0000021
[21] APPT0000022
[22] APPT0000023
[23] APPT0000024
[24] APPT0000025
[25] APPT0000026
[26] APPT0000027
[27] APPT0000028
[28] APPT0000029
[29] APPT0000030
[30] APPT0000031
[31] APPT0000032
[32] APPT0000033
[33] APPT0000034
[34] APPT0000035
[35] APPT0000036
[36] APPT0000037
[37] APPT0000038
[38] APPT0000039
[39] APPT0000040
[40] APPT0000041
[41] APPT0000042
[42] APPT0000043
[43] APPT0000044
[44] APPT0000045
[45] APPT0000046
[46] APPT0000047
[47] APPT0000048
[48] APPT0000049
[49] APPT0000050
[50] APPT0000051
[51] APPT0000052
[52] APPT0000053
[53] APPT0000054
[54] APPT0000055
[55]

Enter the number corresponding to your choice:  1199



Select a value for 'customer_id':
[0] CUST000001
[1] CUST000002
[2] CUST000003
[3] CUST000004
[4] CUST000005
[5] CUST000006
[6] CUST000007
[7] CUST000008
[8] CUST000009
[9] CUST000010
[10] CUST000011
[11] CUST000012
[12] CUST000013
[13] CUST000014
[14] CUST000015
[15] CUST000016
[16] CUST000017
[17] CUST000018
[18] CUST000019
[19] CUST000020
[20] CUST000021
[21] CUST000022
[22] CUST000023
[23] CUST000024
[24] CUST000025
[25] CUST000026
[26] CUST000027
[27] CUST000028
[28] CUST000029
[29] CUST000030
[30] CUST000031
[31] CUST000032
[32] CUST000033
[33] CUST000034
[34] CUST000035
[35] CUST000036
[36] CUST000037
[37] CUST000038
[38] CUST000039
[39] CUST000040
[40] CUST000041
[41] CUST000042
[42] CUST000043
[43] CUST000044
[44] CUST000045
[45] CUST000046
[46] CUST000047
[47] CUST000048
[48] CUST000049
[49] CUST000050
[50] CUST000051
[51] CUST000052
[52] CUST000053
[53] CUST000054
[54] CUST000055
[55] CUST000056
[56] CUST000057
[57] CUST000058
[58] CUST000059
[59] CUST000060
[60] CUST000061

Enter the number corresponding to your choice:  299



Select a value for 'booking_date':
[0] 2023-09-07
[1] 2023-09-08
[2] 2023-09-09
[3] 2023-09-10
[4] 2023-09-11
[5] 2023-09-12
[6] 2023-09-13
[7] 2023-09-14
[8] 2023-09-15
[9] 2023-09-16
[10] 2023-09-17
[11] 2023-09-18
[12] 2023-09-19
[13] 2023-09-20
[14] 2023-09-21
[15] 2023-09-22
[16] 2023-09-23
[17] 2023-09-24
[18] 2023-09-25
[19] 2023-09-26
[20] 2023-09-27
[21] 2023-09-28
[22] 2023-09-29
[23] 2023-09-30
[24] 2023-10-01
[25] 2023-10-02
[26] 2023-10-03
[27] 2023-10-04
[28] 2023-10-05
[29] 2023-10-06
[30] 2023-10-07
[31] 2023-10-08
[32] 2023-10-09
[33] 2023-10-10
[34] 2023-10-11
[35] 2023-10-12
[36] 2023-10-13
[37] 2023-10-14
[38] 2023-10-15
[39] 2023-10-16
[40] 2023-10-17
[41] 2023-10-18
[42] 2023-10-19
[43] 2023-10-20
[44] 2023-10-21
[45] 2023-10-22
[46] 2023-10-23
[47] 2023-10-24
[48] 2023-10-25
[49] 2023-10-26
[50] 2023-10-27
[51] 2023-10-28
[52] 2023-10-29
[53] 2023-10-30
[54] 2023-10-31
[55] 2023-11-01
[56] 2023-11-02
[57] 2023-11-03
[58] 2023-11-04
[59] 2023-11-05
[60] 2023-11-0

Enter the number corresponding to your choice:  450



Select a value for 'booking_time':
[0] 08:00
[1] 08:15
[2] 08:30
[3] 08:45
[4] 09:00
[5] 09:15
[6] 09:30
[7] 09:45
[8] 10:00
[9] 10:15
[10] 10:30
[11] 10:45
[12] 11:00
[13] 11:15
[14] 11:30
[15] 11:45
[16] 12:00
[17] 12:15
[18] 12:30
[19] 12:45
[20] 13:00
[21] 13:15
[22] 13:30
[23] 13:45
[24] 14:00
[25] 14:15
[26] 14:30
[27] 14:45
[28] 15:00
[29] 15:15
[30] 15:30
[31] 15:45
[32] 16:00
[33] 16:15
[34] 16:30
[35] 16:45
[36] 17:00
[37] 17:15
[38] 17:30
[39] 17:45
[40] 18:00
[41] 18:15
[42] 18:30
[43] 18:45
[44] 19:00
[45] 19:15
[46] 19:30
[47] 19:45


Enter the number corresponding to your choice:  45



Select a value for 'booking_datetime':
[0] 2023-09-07T08:45:00
[1] 2023-09-07T10:00:00
[2] 2023-09-07T14:00:00
[3] 2023-09-07T14:30:00
[4] 2023-09-07T16:00:00
[5] 2023-09-07T18:00:00
[6] 2023-09-07T18:15:00
[7] 2023-09-07T19:45:00
[8] 2023-09-08T08:30:00
[9] 2023-09-08T09:00:00
[10] 2023-09-08T09:15:00
[11] 2023-09-08T10:15:00
[12] 2023-09-08T10:30:00
[13] 2023-09-08T11:30:00
[14] 2023-09-08T11:45:00
[15] 2023-09-08T13:00:00
[16] 2023-09-08T13:15:00
[17] 2023-09-08T14:30:00
[18] 2023-09-08T14:45:00
[19] 2023-09-08T18:15:00
[20] 2023-09-08T19:15:00
[21] 2023-09-09T08:00:00
[22] 2023-09-09T08:30:00
[23] 2023-09-09T09:30:00
[24] 2023-09-09T10:00:00
[25] 2023-09-09T15:15:00
[26] 2023-09-09T17:45:00
[27] 2023-09-09T18:00:00
[28] 2023-09-09T19:45:00
[29] 2023-09-10T08:30:00
[30] 2023-09-10T10:00:00
[31] 2023-09-10T10:45:00
[32] 2023-09-10T11:15:00
[33] 2023-09-10T11:30:00
[34] 2023-09-10T12:00:00
[35] 2023-09-10T12:30:00
[36] 2023-09-10T13:30:00
[37] 2023-09-10T15:00:00
[38] 2023-09-10T15:1

Enter the number corresponding to your choice:  41



Select a value for 'created_at':
[0] 2023-08-08T13:16:00
[1] 2023-08-12T08:29:00
[2] 2023-08-13T23:06:00
[3] 2023-08-14T18:39:00
[4] 2023-08-15T07:15:00
[5] 2023-08-15T21:52:00
[6] 2023-08-16T00:43:00
[7] 2023-08-16T19:47:00
[8] 2023-08-17T00:00:00
[9] 2023-08-17T10:16:00
[10] 2023-08-17T11:57:00
[11] 2023-08-17T12:08:00
[12] 2023-08-17T15:07:00
[13] 2023-08-17T16:32:00
[14] 2023-08-17T20:29:00
[15] 2023-08-18T02:49:00
[16] 2023-08-18T05:38:00
[17] 2023-08-18T13:29:00
[18] 2023-08-18T16:41:00
[19] 2023-08-18T19:26:00
[20] 2023-08-18T21:44:00
[21] 2023-08-19T00:19:00
[22] 2023-08-19T08:06:00
[23] 2023-08-19T10:45:00
[24] 2023-08-19T10:57:00
[25] 2023-08-19T13:12:00
[26] 2023-08-19T15:28:00
[27] 2023-08-19T21:28:00
[28] 2023-08-20T07:35:00
[29] 2023-08-20T07:45:00
[30] 2023-08-20T11:36:00
[31] 2023-08-20T11:55:00
[32] 2023-08-20T12:37:00
[33] 2023-08-20T14:51:00
[34] 2023-08-20T15:24:00
[35] 2023-08-20T15:43:00
[36] 2023-08-20T20:23:00
[37] 2023-08-20T20:48:00
[38] 2023-08-20T21:00:00
[

Enter the number corresponding to your choice:  45



Select a value for 'service_type':
[0] AC Repair
[1] Bike Service
[2] Birthday Booking
[3] Car Service
[4] Career
[5] Catering
[6] Cleaning
[7] Consultation
[8] Dine-in
[9] Engine Repair
[10] Extraction
[11] Facial
[12] Filling
[13] Fridge Repair
[14] Hair Color
[15] Haircut
[16] Legal
[17] Manicure
[18] Microwave Repair
[19] Nutrition
[20] Orthodontics
[21] Pedicure
[22] Reservation
[23] Tax
[24] Tyre Change
[25] Washing Machine Repair


Enter the number corresponding to your choice:  45


Invalid choice. Please enter a valid number.


Enter the number corresponding to your choice:  12



Select a value for 'staff_assigned':
[0] Aaron Acosta
[1] Aaron Beck
[2] Aaron Brown
[3] Aaron Erickson
[4] Aaron Estes
[5] Aaron Fowler
[6] Aaron Franklin
[7] Aaron Fritz
[8] Aaron George
[9] Aaron Gordon
[10] Aaron Harding
[11] Aaron Harper
[12] Aaron James
[13] Aaron Jenkins
[14] Aaron Lam
[15] Aaron Lopez
[16] Aaron Mitchell
[17] Aaron Phillips
[18] Aaron Rice
[19] Aaron Romero
[20] Aaron Rose
[21] Aaron Sanchez DVM
[22] Aaron Sanders
[23] Aaron Saunders
[24] Aaron Schmidt
[25] Aaron Sellers
[26] Aaron Smith
[27] Aaron Solis
[28] Aaron Stokes
[29] Aaron Thomas
[30] Aaron Walker
[31] Aaron Williams
[32] Aaron Wilson
[33] Aaron Wood
[34] Abigail Armstrong
[35] Abigail Baird
[36] Abigail Bradley
[37] Abigail Davis
[38] Abigail Pierce
[39] Abigail Robertson
[40] Abigail Summers
[41] Abigail Thornton
[42] Abigail Wilson
[43] Abigail Zuniga
[44] Adam Alexander
[45] Adam Alvarado
[46] Adam Bentley
[47] Adam Cross
[48] Adam Daniels
[49] Adam Davenport
[50] Adam Diaz
[51] Adam Garcia
[52] 

Enter the number corresponding to your choice:  1
Enter value for 'duration_mins':  2
Enter value for 'reschedule_count':  1
Enter value for 'lead_time_minutes':  1
Enter value for 'holiday_flag':  1



Select a value for 'weather':
[0] Cloudy
[1] Rainy
[2] Storm
[3] Sunny
[4] Windy


Enter the number corresponding to your choice:  1



Select a value for 'channel':
[0] online
[1] phone
[2] walk-in


Enter the number corresponding to your choice:  1
Enter value for 'price':  10
Enter value for 'rating':  10



Select a value for 'name':
[0] Aaron Deleon
[1] Aaron Hernandez
[2] Aaron Navarro
[3] Aaron Nelson
[4] Aaron Rivera
[5] Aaron Wall
[6] Abigail Cole
[7] Adam Charles
[8] Adam Cooley
[9] Adam Gibbs
[10] Adam James
[11] Adam Munoz
[12] Adam Wilson
[13] Adrienne Scott
[14] Alan Clark
[15] Alan Cook
[16] Alan Lawrence
[17] Albert Jackson
[18] Alec Mejia
[19] Alejandro Hensley
[20] Alex Perez
[21] Alexa Carter
[22] Alexa Morales
[23] Alexa Riley
[24] Alexa Rodriguez
[25] Alexander Burnett
[26] Alexander Garcia
[27] Alexander Grant
[28] Alexander Harris
[29] Alexander Johnson
[30] Alexander Maldonado
[31] Alexandra Brown
[32] Alexandra Cardenas
[33] Alexandra Chandler DVM
[34] Alexandra Cooper
[35] Alexandra Parker
[36] Alexandra Yoder
[37] Alexandria Beltran
[38] Alexandria Morales
[39] Alexis Herrera
[40] Alexis Kidd
[41] Alexis Martin
[42] Alexis Murray
[43] Alexis Robinson
[44] Alice Carlson
[45] Alice Norton
[46] Alicia Davis
[47] Alicia Lopez
[48] Alison Smith
[49] Allen Berry
[50] All

Enter the number corresponding to your choice:  1
Enter value for 'mobile':  22



Select a value for 'email':
[0] aaguilar@gmail.com
[1] aandrade@hernandez.net
[2] aandrews@yahoo.com
[3] aarondickerson@hotmail.com
[4] abaldwin@hotmail.com
[5] aberger@yahoo.com
[6] abigail16@pham-anderson.info
[7] abond@cooper-bell.com
[8] abullock@owen.info
[9] acampbell@gmail.com
[10] achen@yahoo.com
[11] adam68@anderson-gray.org
[12] adam91@hotmail.com
[13] adam94@mendoza.org
[14] adamblevins@gmail.com
[15] adamhorn@gmail.com
[16] adamroberson@yahoo.com
[17] adamsbrenda@hotmail.com
[18] adamsheather@gmail.com
[19] adamskathy@yahoo.com
[20] adamskelsey@hamilton.net
[21] adixon@hotmail.com
[22] adkinsjesus@garner.org
[23] adrienneperry@hotmail.com
[24] afoley@hotmail.com
[25] aglover@gmail.com
[26] aguirrejames@hotmail.com
[27] aharding@ferguson.com
[28] ahaynes@yahoo.com
[29] ahernandez@yahoo.com
[30] ahill@cunningham.com
[31] aholland@ellis.com
[32] aimeekennedy@yahoo.com
[33] ajohnson@gmail.com
[34] alan55@alexander.com
[35] alan66@gmail.com
[36] alanshields@gmail.com
[37] aleon

Enter the number corresponding to your choice:  1



Select a value for 'gender':
[0] Female
[1] Male
[2] Other


Enter the number corresponding to your choice:  1
Enter value for 'age':  2



Select a value for 'city':
[0] Aaronbury
[1] Aaronport
[2] Aaronshire
[3] Aaronside
[4] Aaronview
[5] Abigailhaven
[6] Adamshaven
[7] Adamsside
[8] Adamstad
[9] Adkinsbury
[10] Adrianview
[11] Adrienneville
[12] Aguilarborough
[13] Aguirreville
[14] Alanborough
[15] Alanhaven
[16] Alexanderbury
[17] Alexanderland
[18] Alexanderview
[19] Aliciastad
[20] Alisonhaven
[21] Allenfort
[22] Allenmouth
[23] Allenport
[24] Allenview
[25] Allisonberg
[26] Allisonbury
[27] Allisonview
[28] Alvarezton
[29] Alvintown
[30] Alyssashire
[31] Alyssatown
[32] Amandachester
[33] Amandahaven
[34] Amandaton
[35] Amberfurt
[36] Amyburgh
[37] Amyville
[38] Andersenchester
[39] Andersonbury
[40] Andersonfort
[41] Andersonmouth
[42] Andersonport
[43] Andersonstad
[44] Andersonville
[45] Andreafurt
[46] Andreaside
[47] Andreastad
[48] Andrefort
[49] Andrewbury
[50] Andrewmouth
[51] Andrewschester
[52] Andrewside
[53] Andrewville
[54] Angelachester
[55] Angelafurt
[56] Angelaland
[57] Angelamouth
[58] Angelapor

Enter the number corresponding to your choice:  1



Select a value for 'joined_date':
[0] 2021-07-29
[1] 2021-07-31
[2] 2021-08-01
[3] 2021-08-02
[4] 2021-08-03
[5] 2021-08-04
[6] 2021-08-05
[7] 2021-08-06
[8] 2021-08-07
[9] 2021-08-08
[10] 2021-08-09
[11] 2021-08-10
[12] 2021-08-11
[13] 2021-08-13
[14] 2021-08-14
[15] 2021-08-15
[16] 2021-08-16
[17] 2021-08-17
[18] 2021-08-18
[19] 2021-08-19
[20] 2021-08-20
[21] 2021-08-21
[22] 2021-08-22
[23] 2021-08-23
[24] 2021-08-25
[25] 2021-08-26
[26] 2021-08-27
[27] 2021-08-28
[28] 2021-08-29
[29] 2021-08-30
[30] 2021-08-31
[31] 2021-09-01
[32] 2021-09-02
[33] 2021-09-03
[34] 2021-09-04
[35] 2021-09-05
[36] 2021-09-06
[37] 2021-09-07
[38] 2021-09-08
[39] 2021-09-09
[40] 2021-09-10
[41] 2021-09-11
[42] 2021-09-13
[43] 2021-09-14
[44] 2021-09-15
[45] 2021-09-16
[46] 2021-09-17
[47] 2021-09-18
[48] 2021-09-20
[49] 2021-09-21
[50] 2021-09-22
[51] 2021-09-25
[52] 2021-09-26
[53] 2021-09-27
[54] 2021-09-28
[55] 2021-09-30
[56] 2021-10-03
[57] 2021-10-04
[58] 2021-10-05
[59] 2021-10-06
[60] 2021-10-07

Enter the number corresponding to your choice:  2



Select a value for 'tags':
[0] loyal
[1] nan
[2] new
[3] occasional
[4] price-sensitive
[5] vip


Enter the number corresponding to your choice:  5



Select a value for 'primary_business':
[0] Appliance Repair
[1] Auto Repair
[2] Cafe
[3] Consultant
[4] Dentist
[5] Salon


Enter the number corresponding to your choice:  1



Select a value for 'primary_service':
[0] AC Repair
[1] Bike Service
[2] Birthday Booking
[3] Car Service
[4] Career
[5] Catering
[6] Cleaning
[7] Consultation
[8] Dine-in
[9] Engine Repair
[10] Extraction
[11] Facial
[12] Filling
[13] Fridge Repair
[14] Hair Color
[15] Haircut
[16] Legal
[17] Manicure
[18] Microwave Repair
[19] Nutrition
[20] Orthodontics
[21] Pedicure
[22] Reservation
[23] Tax
[24] Tyre Change
[25] Washing Machine Repair


Enter the number corresponding to your choice:  5



Prediction for your input:
The predicted status is: 4




# import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer
import joblib  # For saving/loading model
import os      # For file existence check

# --- 1. Load dataset ---
try:
    df = pd.read_excel("new.xlsx")
except FileNotFoundError:
    print("Error: The file 'new.xlsx' was not found.")
    exit()

# Drop unnecessary columns
if "Unnamed: 0" in df.columns:
    df = df.drop(columns=["Unnamed: 0"])

# Copy original dataframe
df_original = df.copy()

# --- 2. Handle categorical variables ---
cat_cols = df.select_dtypes(include=['object']).columns
print("Categorical Columns:", list(cat_cols))

label_encoders = {}
for col in cat_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

# --- 3. Split Features & Target ---
# Assuming 'status' is the target column
X = df.drop(columns=["status"])
y = df["status"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Handle missing values
imputer = SimpleImputer(strategy="constant", fill_value=0)
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

# --- 4. Train Gradient Boosting Classifier ---
print("\n--- Model Training: Gradient Boosting Classifier ---")
model = GradientBoostingClassifier(
    n_estimators=500,
    learning_rate=0.0092,
    subsample=0.84555,
    max_depth=10,
    random_state=42
)
model.fit(X_train, y_train)
print("✅ Gradient Boosting Classifier trained.")

# --- 5. Evaluate ---
y_pred = model.predict(X_test)

print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# --- 6. Save the trained model and LabelEncoders ---
print("\n--- Saving the trained model and LabelEncoders ---")
joblib.dump(model, "mango_model.pkl")
joblib.dump(label_encoders, "mango_label_encoders.pkl")
print("✅ Model and LabelEncoders saved successfully as 'mango_model.pkl' and 'mango_label_encoders.pkl'.")

# --- 7. Load the saved model and LabelEncoders ---
print("\n--- Loading the saved model and LabelEncoders ---")
if os.path.exists("mango_model.pkl") and os.path.exists("mango_label_encoders.pkl"):
    loaded_model = joblib.load("mango_model.pkl")
    loaded_label_encoders = joblib.load("mango_label_encoders.pkl")
    print("✅ Model and LabelEncoders loaded successfully.")
else:
    print("Error: Saved model or encoder files not found.")
    exit()

# --- 8. Test with new user input ---
print("\n--- Testing with User Input ---")
user_input_data = {}

for col in X.columns:
    if col in cat_cols:
        le = loaded_label_encoders[col]
        options = le.classes_
        print(f"\nSelect a value for '{col}':")
        for i, option in enumerate(options):
            print(f"[{i}] {option}")
        while True:
            try:
                choice_index = int(input("Enter the number corresponding to your choice: "))
                if 0 <= choice_index < len(options):
                    value = options[choice_index]
                    break
                else:
                    print("Invalid choice. Please enter a valid number.")
            except ValueError:
                print("Invalid input. Please enter a number.")
    else:
        value = input(f"Enter value for '{col}': ")
    user_input_data[col] = [value]

# Convert input to DataFrame
user_df = pd.DataFrame(user_input_data)

# Encode categorical columns
for col in cat_cols:
    if col in user_df.columns:
        le = loaded_label_encoders[col]
        user_df[col] = le.transform(user_df[col])

# Ensure numeric format
user_df = user_df.apply(pd.to_numeric)

# Prediction
prediction = loaded_model.predict(user_df)

print("\nPrediction for your input:")
print(f"The predicted status is: {prediction[0]}")


In [None]:
# --- 7. Load the saved model and LabelEncoders ---
print("\n--- Loading the saved model and LabelEncoders ---")
if os.path.exists("mango_model.pkl") and os.path.exists("mango_label_encoders.pkl"):
    loaded_model = joblib.load("mango_model.pkl")
    loaded_label_encoders = joblib.load("mango_label_encoders.pkl")
    print("✅ Model and LabelEncoders loaded successfully.")
else:
    print("Error: Saved model or encoder files not found.")
    exit()

# --- 8. Test with new user input ---
print("\n--- Testing with User Input ---")
user_input_data = {}

for col in X.columns:
    if col in cat_cols:
        le = loaded_label_encoders[col]
        options = le.classes_
        print(f"\nSelect a value for '{col}':")
        for i, option in enumerate(options):
            print(f"[{i}] {option}")
        while True:
            try:
                choice_index = int(input("Enter the number corresponding to your choice: "))
                if 0 <= choice_index < len(options):
                    value = options[choice_index]
                    break
                else:
                    print("Invalid choice. Please enter a valid number.")
            except ValueError:
                print("Invalid input. Please enter a number.")
    else:
        value = input(f"Enter value for '{col}': ")
    user_input_data[col] = [value]

# Convert input to DataFrame
user_df = pd.DataFrame(user_input_data)

# Encode categorical columns
for col in cat_cols:
    if col in user_df.columns:
        le = loaded_label_encoders[col]
        user_df[col] = le.transform(user_df[col])

# Ensure numeric format
user_df = user_df.apply(pd.to_numeric)

# Prediction
prediction = loaded_model.predict(user_df)

print("\nPrediction for your input:")
print(f"The predicted status is: {prediction[0]}")
