# Ensemble Method CatBoost and RandomForest

In [36]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from catboost import CatBoostClassifier
from sklearn.preprocessing import LabelEncoder
import pandas as pd

data = load_wine()
X, y = data.data, data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

catboost_classifier = CatBoostClassifier(iterations=100, learning_rate=0.1, verbose=False)
catboost_classifier.fit(X_train, y_train)

def get_user_input():
    user_input = input("Enter wine features separated by commas: ")
    user_values = [float(value) for value in user_input.split(',')]
    return user_values

def ensemble_prediction(input_data):
    rf_proba = rf_classifier.predict_proba(input_data)[0]
    catboost_proba = catboost_classifier.predict_proba(input_data)[0]

    combined_proba = (rf_proba + catboost_proba) / 2

    predicted_class = combined_proba.argmax()

    return predicted_class

user_data = get_user_input()
user_data_df = pd.DataFrame([user_data])

predicted_class = ensemble_prediction(user_data_df)





'''wine_type = wine_df['target_names'][predicted_class]'''
#Use the above with normal datasets





wine_type = data.target_names[predicted_class]

print(f"The predicted wine type for the user input is: {wine_type}")

rf_accuracy = rf_classifier.score(X_test, y_test)
catboost_accuracy = catboost_classifier.score(X_test, y_test)

print(f"Random Forest Classifier Accuracy: {rf_accuracy:.2f}")
print(f"CatBoost Classifier Accuracy: {catboost_accuracy:.2f}")

ensemble_accuracy = (rf_accuracy + catboost_accuracy) / 2
print(f"Ensemble Model Accuracy: {ensemble_accuracy:.2f}")


Enter wine features separated by commas: 14.23, 2.8, 5.64, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 5.64, 1.04
The predicted wine type for the user input is: class_1
Random Forest Classifier Accuracy: 1.00
CatBoost Classifier Accuracy: 1.00
Ensemble Model Accuracy: 1.00


the inputs are: 14.23, 2.8, 5.64, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 5.64, 1.04

# User Input:

In [25]:
def get_user_input():
    user_input = input("Enter wine features separated by commas (alcohol, malic_acid, ash, alcalinity_of_ash, magnesium, total_phenols, flavanoids, nonflavanoid_phenols, proanthocyanins, color_intensity, hue, od280/od315_of_diluted_wines, proline): ")
    user_values = [float(value) for value in user_input.split(',')]
    return user_values

def ensemble_prediction(input_data):

    rf_proba = rf_classifier.predict_proba(input_data)[0]
    catboost_proba = catboost_classifier.predict_proba(input_data)[0]

    # Combine probabilities (simple averaging for demonstration)
    combined_proba = (rf_proba + catboost_proba) / 2

    # Get the index of the class with the highest probability
    predicted_class = combined_proba.argmax()

    return predicted_class

user_data = get_user_input()

user_data_df = pd.DataFrame([user_data]).values

predicted_class = ensemble_prediction(user_data_df)

# Get the wine type label using the target classes from the dataset
wine_type = data.target_names[predicted_class]

print(f"The predicted wine type for the user input is: {wine_type}")

# Use the test set to evaluate model accuracy
rf_accuracy = rf_classifier.score(X_test, y_test)
catboost_accuracy = catboost_classifier.score(X_test, y_test)

print(f"Random Forest Classifier Accuracy: {rf_accuracy:.2f}")
print(f"CatBoost Classifier Accuracy: {catboost_accuracy:.2f}")

ensemble_accuracy = (rf_accuracy + catboost_accuracy) / 2
print(f"Ensemble Model Accuracy: {ensemble_accuracy:.2f}")


Enter wine features separated by commas (alcohol, malic_acid, ash, alcalinity_of_ash, magnesium, total_phenols, flavanoids, nonflavanoid_phenols, proanthocyanins, color_intensity, hue, od280/od315_of_diluted_wines, proline): 14.23, 2.8, 5.64, 1.71, 2.43, 15.6, 127, 2.80, 3.06, 0.28, 2.29, 5.64, 1.04
The predicted wine type for the user input is: class_1
Random Forest Classifier Accuracy: 1.00
CatBoost Classifier Accuracy: 1.00
Ensemble Model Accuracy: 1.00


In [29]:
user_data

[14.23, 2.8, 5.64, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 5.64, 1.04]

In [30]:
user_data_df

array([[ 14.23,   2.8 ,   5.64,   1.71,   2.43,  15.6 , 127.  ,   2.8 ,
          3.06,   0.28,   2.29,   5.64,   1.04]])