In [3]:
import pandas as pd 
import seaborn as sns 
from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

# Create the dataset
data = { 
    "age": [15, 25, 35, 45, 55, 22, 40, 28, 18, 60], 
    "likes_cheese": [1, 1, 0, 1, 0, 1, 0, 1, 1, 0], 
    "fast_food": [0, 0, 1, 0, 1, 0, 1, 0, 0, 1], 
    "Salary": [50000, 100000, 10000, 80000, 8000, 45000, 11000, 120000, 130000, 13000], 
    "location": ["Urban", "Urban", "Rural", "Urban", "Rural", "Urban", "Rural", "Urban", "Urban", "Rural"], 
    "Awareness": ["know", "know", "Unknow", "know", "Unknow", "know", "Unknow", "know", "know", "Unknow"], 
    "working": ["IT", "IT", "Farmer", "Engineer", "Driver", "Pilot", "Painter", "IT", "Artist", "Player"], 
    "Health": ["Good", "Bad", "Bad", "Good", "Bad", "Good", "Bad", "Good", "Good", "Bad"], 
    "likes_pizza": ["yes", "yes", "no", "yes", "no", "yes", "no", "yes", "yes", "no"]
} 

# Create DataFrame
df = pd.DataFrame(data) 

# Encode categorical features
label_encoders = {}
categorical_cols = ["location", "Awareness", "working", "Health"]

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Prepare features and target
X = df[["age", "likes_cheese", "fast_food", "Salary", "location", "working", "Health", "Awareness"]] 
y = df["likes_pizza"] 

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) 

# Initialize and train the Random Forest model
model = RandomForestClassifier(random_state=42) 
model.fit(X_train, y_train) 

# Make predictions on the test set
y_pred = model.predict(X_test) 

# Display results
print("\nDataFrame:\n", df) 
print("\nAccuracy:", accuracy_score(y_test, y_pred)) 
print("\nClassification Report:\n", classification_report(y_test, y_pred)) 

# Make a prediction for a new example
example_person = {
    "age": 30, 
    "likes_cheese": 1, 
    "fast_food": 0, 
    "Salary": 90000, 
    "location": label_encoders["location"].transform(["Urban"])[0], 
    "Awareness": label_encoders["Awareness"].transform(["know"])[0], 
    "working": label_encoders["working"].transform(["IT"])[0],
    "Health": label_encoders["Health"].transform(["Good"])[0]
}

# Convert to DataFrame
example_df = pd.DataFrame([example_person])  

# Ensure feature order matches training data
example_df = example_df[X_train.columns]  

# Debugging: Print feature orders
print("\nTraining feature order:", list(X_train.columns))
print("Example feature order:", list(example_df.columns))

# Make a prediction
prediction = model.predict(example_df)  
print(f"\nPrediction: The person {'likes' if prediction[0] == 'yes' else 'does not like'} pizza.") 


DataFrame:
    age  likes_cheese  fast_food  Salary  location  Awareness  working  Health  \
0   15             1          0   50000         1          1        4       1   
1   25             1          0  100000         1          1        4       0   
2   35             0          1   10000         0          0        3       0   
3   45             1          0   80000         1          1        2       1   
4   55             0          1    8000         0          0        1       0   
5   22             1          0   45000         1          1        6       1   
6   40             0          1   11000         0          0        5       0   
7   28             1          0  120000         1          1        4       1   
8   18             1          0  130000         1          1        0       1   
9   60             0          1   13000         0          0        7       0   

  likes_pizza  
0         yes  
1         yes  
2          no  
3         yes  
4          no  