In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
data = pd.read_csv('../datasets/XWines_Full_100K_wines.csv')
data = data[['Harmonize', 'Type', 'Grapes']]

# Preprocessing
data['Harmonize'] = data['Harmonize'].apply(lambda x: eval(x) if isinstance(x, str) else x)
data['Grapes'] = data['Grapes'].apply(lambda x: eval(x) if isinstance(x, str) else x)

mlb_harmonize = MultiLabelBinarizer()
harmonize_encoded = pd.DataFrame(mlb_harmonize.fit_transform(data['Harmonize']))

mlb_grapes = MultiLabelBinarizer()
grapes_encoded = pd.DataFrame(mlb_grapes.fit_transform(data['Grapes']))

label_encoder = LabelEncoder()
data['Type'] = label_encoder.fit_transform(data['Type'])

X = harmonize_encoded
y = data['Type']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest with limited hyperparameter tuning
rf = RandomForestClassifier(random_state=42)
param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [None, 10],
}

search = GridSearchCV(rf, param_grid, cv=3, scoring='accuracy')
search.fit(X_train, y_train)
best_rf = search.best_estimator_

# Evaluation
y_pred = best_rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)

# Feature importance
importances = best_rf.feature_importances_
feature_names = mlb_harmonize.classes_

# Results
print(f"Best Parameters: {search.best_params_}")
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(report)

# Plot feature importances
plt.figure(figsize=(10, 6))
sns.barplot(x=importances, y=feature_names)
plt.title('Feature Importance')
plt.xlabel('Importance')
plt.ylabel('Features')
plt.tight_layout()
plt.show()