In [33]:
import numpy as np  
import pandas as pd  
from matplotlib import pyplot as plt  
from sklearn.preprocessing import LabelEncoder  
from sklearn.model_selection import train_test_split  
from sklearn.tree import DecisionTreeClassifier  
from sklearn.metrics import accuracy_score  

In [34]:
df=pd.read_csv('IRIS.csv')  # reading dataset from CSV file

In [35]:
df.head(10)  # display first few rows of dataset

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
5,5.4,3.9,1.7,0.4,Iris-setosa
6,4.6,3.4,1.4,0.3,Iris-setosa
7,5.0,3.4,1.5,0.2,Iris-setosa
8,4.4,2.9,1.4,0.2,Iris-setosa
9,4.9,3.1,1.5,0.1,Iris-setosa


In [36]:
df.shape  #returns the number of rows and columns in the DataFrame


(150, 5)

In [37]:
df.info()  # show dataset info (columns, datatypes, nulls)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [38]:
df.isnull()  # shows missing values as True/False

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,False,False,False,False,False
1,False,False,False,False,False
2,False,False,False,False,False
3,False,False,False,False,False
4,False,False,False,False,False
...,...,...,...,...,...
145,False,False,False,False,False
146,False,False,False,False,False
147,False,False,False,False,False
148,False,False,False,False,False


In [39]:
df.isna().sum()   # counts how many missing values each column has

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [40]:
df.describe()  # statistical summary of dataset

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [41]:
encoder = LabelEncoder() # create label encoder to convert categories to numbers

In [42]:
df['species'] = encoder.fit_transform (df['species'])  # # convert species labels into numeric values

In [43]:
df.sample(5)  # performing operation

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
120,6.9,3.2,5.7,2.3,2
13,4.3,3.0,1.1,0.1,0
49,5.0,3.3,1.4,0.2,0
66,5.6,3.0,4.5,1.5,1
110,6.5,3.2,5.1,2.0,2


In [44]:
column_name = 'species'  # # set target column name
unique_values = df[column_name].unique()  # get unique values from the column
print(f"Unique values in the '{column_name}' column:")  # print a message about the column
print(unique_values) 

Unique values in the 'species' column:
[0 1 2]


In [45]:
X = df[['sepal_length' , 'sepal_width' , 'petal_length' , 'petal_width']]  # Selecting the feature columns (inputs) from the dataset
Y = df.species  # Selecting the target column (output) from the dataset

In [46]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)  # splitting dataset into training and testing sets

In [47]:
model = DecisionTreeClassifier(random_state=42)  
model.fit(X_train, Y_train)  # train the model on training data

In [48]:
Y_pred = model.predict(X_test)  # make predictions using trained model

In [50]:
accuracy = accuracy_score(Y_test, Y_pred)  # calculate accuracy of model
print("Accuracy:", accuracy) 

Accuracy: 1.0
