In [1]:
# Import necessary libraries
import pandas as pd  # For data manipulation
from sklearn.model_selection import train_test_split  # For splitting the dataset
from sklearn.linear_model import LogisticRegression  # Logistic Regression model
from sklearn.metrics import accuracy_score  # For evaluating the model's accuracy

In [2]:
# Step 1: Load the dataset
# Here we load the dataset from a CSV file.
df = pd.read_csv('/content/iris.csv')

In [3]:
# Step 2: Explore the dataset (optional)
# You can check the first few rows to ensure it's loaded correctly.
print(df.head())  # Display the first five rows of the dataset

   sepal_length  sepal_width  petal_length  petal_width species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          3.2           1.3          0.2  setosa
3           4.6          3.1           1.5          0.2  setosa
4           5.0          3.6           1.4          0.2  setosa


In [4]:
# Step 3: Prepare the dataset
# We'll separate the features (X) and the target variable (y).
X = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]  # Feature variables
y = df['species']  # Target variable

In [5]:
# Step 4: Split the dataset into training and testing sets
# This step is crucial for evaluating the model's performance on unseen data.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Step 5: Initialize and train the model
# We are using Logistic Regression, which is simple and effective for classification tasks.
model = LogisticRegression(max_iter=200)  # Increasing max_iter to ensure convergence
model.fit(X_train, y_train)  # Train the model with the training data


In [7]:
# Step 6: Evaluate the model on training data
# Predicting the training data to calculate training accuracy.
y_train_pred = model.predict(X_train)  # Predictions on the training set
train_accuracy = accuracy_score(y_train, y_train_pred)  # Calculating training accuracy
print(f'Training Accuracy: {train_accuracy * 100:.2f}%')  # Printing training accuracy

Training Accuracy: 97.50%


In [8]:
# Step 7: Evaluate the model on testing data
# Predicting the testing data to calculate testing accuracy.
y_test_pred = model.predict(X_test)  # Predictions on the test set
test_accuracy = accuracy_score(y_test, y_test_pred)  # Calculating testing accuracy
print(f'Testing Accuracy: {test_accuracy * 100:.2f}%')  # Printing testing accuracy

Testing Accuracy: 100.00%
