In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score

# Sample Data: Create a DataFrame with categorical features and a categorical target
data = pd.DataFrame({
    'Color': ['Red', 'Blue', 'Green', 'Blue', 'Red'],
    'Size': ['S', 'M', 'L', 'M', 'S'],
    'Shape': ['Circle', 'Square', 'Triangle', 'Square', 'Circle'],
    'Label': ['ClassA', 'ClassB', 'ClassC', 'ClassB', 'ClassA']  # Multi-class target variable
})

# Define features and target
X = data[['Color', 'Size', 'Shape']]
y = data['Label']

# Define categorical columns to encode
categorical_features = ['Color', 'Size', 'Shape']

# Preprocess categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Encode target labels to integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Define the model
model = RandomForestClassifier()

# Create a pipeline with preprocessor and model
clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', model)
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Making a prediction with new data
new_data = pd.DataFrame({
    'Color': ['Green'],
    'Size': ['S'],
    'Shape': ['Triangle']
})
prediction = clf.predict(new_data)
predicted_class = label_encoder.inverse_transform(prediction)
print(f'Prediction for new data: {predicted_class[0]}')


Accuracy: 1.00
Prediction for new data: ClassA


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score

# Sample Data: Create a DataFrame with categorical features and a categorical target
data = pd.DataFrame({
    'Color': ['Red', 'Blue', 'Green', 'Blue', 'Red'],
    'Size': ['S', 'M', 'L', 'M', 'S'],
    'Shape': ['Circle', 'Square', 'Triangle', 'Square', 'Circle'],
    'Label': ['ClassA', 'ClassB', 'ClassC', 'ClassB', 'ClassA']  # Multi-class target variable
})

# Define features and target
X = data[['Color', 'Size', 'Shape']]
y = data['Label']

# Define categorical columns to encode
categorical_features = ['Color', 'Size', 'Shape']

# Preprocess categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Encode target labels to integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Define the model
model = RandomForestClassifier()

# Create a pipeline with preprocessor and model
clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', model)
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Making a prediction with new data
new_data = pd.DataFrame({
    'Color': ['Green'],
    'Size': ['S'],
    'Shape': ['Triangle']
})
prediction = clf.predict(new_data)
predicted_class = label_encoder.inverse_transform(prediction)
print(f'Prediction for new data: {predicted_class[0]}')


Accuracy: 1.00
Prediction for new data: ClassC
