In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier

# Load and preprocess your data
data_path = './simplified_coffee.csv'
coffee_data = pd.read_excel("/Users/krishnavarma/Developer/UG AI MATERIAL /4th SEM COURSE/ML/ML CLG PR.CODES!/A7 ML/simplified_coffee.xlsx")

X_data = coffee_data.drop(['name', 'rating', 'review_date', 'review'], axis=1)
y_data = coffee_data['rating']
num_features = ['100g_USD']
cat_features = ['roaster', 'roast', 'loc_country', 'origin']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), cat_features)])

X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=42)

# Decision Tree
dt_pipeline = make_pipeline(preprocessor, DecisionTreeClassifier(random_state=42))
dt_pipeline.fit(X_train, y_train)
dt_predictions = dt_pipeline.predict(X_test)
print(f"Decision Tree Accuracy: {accuracy_score(y_test, dt_predictions)}")

# Random Forest
rf_pipeline = make_pipeline(preprocessor, RandomForestClassifier(random_state=42))
rf_pipeline.fit(X_train, y_train)
rf_predictions = rf_pipeline.predict(X_test)
print(f"Random Forest Accuracy: {accuracy_score(y_test, rf_predictions)}")

# AdaBoost
ab_pipeline = make_pipeline(preprocessor, AdaBoostClassifier(random_state=42))
ab_pipeline.fit(X_train, y_train)
ab_predictions = ab_pipeline.predict(X_test)
print(f"AdaBoost Accuracy: {accuracy_score(y_test, ab_predictions)}")

# MLP with RandomizedSearchCV
mlp_pipeline = make_pipeline(preprocessor, MLPClassifier(max_iter=1000, random_state=42))
param_dist_mlp = {
    'mlpclassifier__hidden_layer_sizes': [(50,), (100,)],
    'mlpclassifier__activation': ['tanh', 'relu'],
}

rnd_search_mlp = RandomizedSearchCV(mlp_pipeline, param_distributions=param_dist_mlp,
                                    n_iter=5, cv=3, scoring='accuracy', random_state=42)
rnd_search_mlp.fit(X_train, y_train)
print(f"MLP Best Score: {rnd_search_mlp.best_score_}")
print(f"MLP Best Params: {rnd_search_mlp.best_params_}")


Decision Tree Accuracy: 0.34
Random Forest Accuracy: 0.296
AdaBoost Accuracy: 0.308




MLP Best Score: 0.3102409638554217
MLP Best Params: {'mlpclassifier__hidden_layer_sizes': (50,), 'mlpclassifier__activation': 'tanh'}


