In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# Load the Wine dataset
wine = load_wine()
X = wine.data  # Features
y = wine.target  # Target labels

# Feature names and target names
feature_names = wine.feature_names
target_names = wine.target_names

# Convert to DataFrame for easier exploration
df = pd.DataFrame(X, columns=feature_names)
df['target'] = y

# Display first few rows of the dataset
print("First few rows of the Wine dataset:")
print(df.head())


In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Scale the features for better model performance
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Initialize and train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)


In [None]:
# Make predictions
y_pred = rf_model.predict(X_test_scaled)

# Print classification metrics
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Confusion Matrix
plt.figure(figsize=(8, 6))
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=target_names, yticklabels=target_names)
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()


In [None]:
# Feature importance
feature_importance = pd.DataFrame({
    'feature': feature_names,
    'importance': rf_model.feature_importances_
}).sort_values(by='importance', ascending=False)

# Plot feature importance
plt.figure(figsize=(10, 5))
sns.barplot(x='importance', y='feature', data=feature_importance)
plt.title('Feature Importance in Random Forest Model')
plt.xlabel('Importance Score')
plt.tight_layout()
plt.show()


In [None]:
# Example prediction for a new sample
new_sample = np.array([[13.5, 2.0, 2.5, 15.0, 95.0, 2.3, 2.6, 1.3, 1.8, 4.5, 1.1, 3.2, 1000]])
new_sample_scaled = scaler.transform(new_sample)
prediction = rf_model.predict(new_sample_scaled)
predicted_class = target_names[prediction[0]]

print(f"Prediction for the new sample {new_sample[0]}:")
print(f"Predicted Class: {predicted_class}")
