# Iris Species Predictor

This notebook trains a Random Forest classifier on the Iris flower dataset stored in `iris-3.csv`. It performs a simple train/test split, evaluates performance, and demonstrates how to generate predictions for new measurements.

In [None]:
from pathlib import Path

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [None]:
data_path = Path('iris-3.csv')
if not data_path.exists():
    raise FileNotFoundError(f'Could not find {data_path.resolve()}')

data = pd.read_csv(data_path)
print(f'Dataset shape: {data.shape}')
data.head()


In [None]:
# Separate features (X) and target labels (y)
feature_columns = [col for col in data.columns if col.lower() != 'species']
X = data[feature_columns]
y_raw = data['species'].astype(str).str.strip().str.lower()

# Encode species labels into integers for the model
y_encoder = LabelEncoder()
y = y_encoder.fit_transform(y_raw)
print('Encoded classes:', list(y_encoder.classes_))


In [None]:
# Split the dataset into training and testing subsets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f'Training samples: {X_train.shape[0]}')
print(f'Testing samples: {X_test.shape[0]}')


In [None]:
# Train a Random Forest classifier
model = RandomForestClassifier(n_estimators=300, random_state=42)
model.fit(X_train, y_train)
print('Model training complete.')


In [None]:
# Evaluate the model on the test set
y_pred = model.predict(X_test)

print('Accuracy:', f'{accuracy_score(y_test, y_pred):.3f}')
print('\nClassification report:')
print(classification_report(y_test, y_pred, target_names=y_encoder.classes_))


In [None]:
# Example prediction for a new sample measurement
sample_measurement = pd.DataFrame([
    {
        'sepal_length': 6.1,
        'sepal_width': 2.8,
        'petal_length': 4.7,
        'petal_width': 1.3,
    }
])

predicted_species = y_encoder.inverse_transform(model.predict(sample_measurement))[0]
print('Predicted species for sample measurement:', predicted_species)
