In [40]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

### 1. Read the dataset to python environment.

In [42]:
file_path = r'C:\Users\acer\Downloads\iris (1).xls'
df = pd.read_excel(file_path)
print(df.isnull().sum())


SL                7
SW                6
PL                6
PW                0
Classification    0
dtype: int64


### 2. Pre-processing steps. 

In [43]:
imputer = SimpleImputer(strategy='most_frequent')
df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

In [45]:
le = LabelEncoder()
df['Classification'] = le.fit_transform(df['Classification'])

In [46]:
X = df.drop('Classification', axis=1)
y = df['Classification']


In [48]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [49]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### 3. Finding the best classification model. 

In [50]:
models = [
    LogisticRegression(),
    DecisionTreeClassifier(),
    RandomForestClassifier()
]

best_model = None
best_accuracy = 0

In [51]:
for model in models:
    # Train the model
    model.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Calculate the accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # Check if this model has the best accuracy so far
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model

# Print the best model and its accuracy
print("Best Model:", best_model)
print("Accuracy:", best_accuracy)

Best Model: DecisionTreeClassifier()
Accuracy: 1.0
