In [None]:
# Predicting Flower Types with Random Forest Algorithms


import pandas as pd
from sklearn.model_selection import train_test_split   
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


path="D:\\Projects\\flower_classification_based_on_type_of_class-main\\Train.csv"  # change path if needed
data=pd.read_csv(path)


# Safety: remove hidden spaces
data.columns = data.columns.str.strip()

# Features & target
input = data[['Locality_Code', 'Species', 'Height', 'Diameter']]
output= data['Class']


# Trainâ€“test split
x_train,x_test,y_train,y_test = train_test_split(input,output,test_size=0.2, random_state=42,stratify=output)

# Preprocessing
categorical_features=['Species','Locality_Code']
numeric_features=['Height','Diameter']
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
        ('num', 'passthrough', numeric_features)
    ]
)


# Model
rf_model = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    random_state=42,
    class_weight='balanced'   # helps if classes are imbalanced
)

pipeline = Pipeline(
    steps=[
        ('preprocess', preprocessor),
        ('model', rf_model)
    ]
)

# Train
pipeline.fit(x_train, y_train)


# Evaluate
y_pred = pipeline.predict(x_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


# Predict new flower
new_flower = pd.DataFrame([{
    'Locality_Code':16,
    'Species': 9,
    'Height': 11,
    'Diameter':45
}])

predicted_class = pipeline.predict(new_flower)


print("Predicted Class: ",predicted_class[0])