In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load your dataset CSV file
csv_path = "IRIS.csv"

# Read CSV into DataFrame
df = pd.read_csv(csv_path)

# Show rows of the data
print(df.head(150))

# Separate features and target
X = df.iloc[:, :-1]   
y = df.iloc[:, -1]   

# Convert species names to numbers
le = LabelEncoder()
y = le.fit_transform(y)

# Create a dictionary to map numbers back to species names
label_map = {i: species for i, species in enumerate(le.classes_)}

print("\nLabel mapping:", label_map)


     sepal_length  sepal_width  petal_length  petal_width         species
0             5.1          3.5           1.4          0.2     Iris-setosa
1             4.9          3.0           1.4          0.2     Iris-setosa
2             4.7          3.2           1.3          0.2     Iris-setosa
3             4.6          3.1           1.5          0.2     Iris-setosa
4             5.0          3.6           1.4          0.2     Iris-setosa
..            ...          ...           ...          ...             ...
145           6.7          3.0           5.2          2.3  Iris-virginica
146           6.3          2.5           5.0          1.9  Iris-virginica
147           6.5          3.0           5.2          2.0  Iris-virginica
148           6.2          3.4           5.4          2.3  Iris-virginica
149           5.9          3.0           5.1          1.8  Iris-virginica

[150 rows x 5 columns]

Label mapping: {0: 'Iris-setosa', 1: 'Iris-versicolor', 2: 'Iris-virginica'}


In [13]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Training shape:", X_train.shape)
print("Testing shape:", X_test.shape)

Training shape: (120, 4)
Testing shape: (30, 4)


In [14]:
log_model = LogisticRegression(max_iter=200)
log_model.fit(X_train, y_train)

nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

print("Models trained successfully")


Models trained successfully


In [15]:
log_preds = log_model.predict(X_test)
nb_preds = nb_model.predict(X_test)

log_acc = accuracy_score(y_test, log_preds)
nb_acc = accuracy_score(y_test, nb_preds)

print("Logistic Regression Accuracy:", log_acc)
print("Gaussian Naive Bayes Accuracy:", nb_acc)


Logistic Regression Accuracy: 1.0
Gaussian Naive Bayes Accuracy: 1.0


In [20]:
sample = [[5.1, 3.5, 1.4, 0.2]]

sample_df = pd.DataFrame(sample, columns=X.columns)

log_result = label_map[log_model.predict(sample_df)[0]]
nb_result = label_map[nb_model.predict(sample_df)[0]]

print("Logistic Regression Prediction:", log_result)
print("Gaussian NB Prediction:", nb_result)


Logistic Regression Prediction: Iris-setosa
Gaussian NB Prediction: Iris-setosa
