# Implement and demonstrate Logistic Regression algorithm on sklearn dataset – Iris Flower to classify species of the flower.
Dataset: https://www.kaggle.com/datasets/arshid/iris-flower-dataset/code


# Import necessary Libraries

In [1]:
#import the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_absolute_error , mean_squared_error , r2_score

In [2]:
#load the dataset
df=pd.read_csv("/kaggle/input/datset-iris/IRIS.csv")

In [3]:
df.shape

(150, 5)

In [None]:
# Display the first few rows of the dataframe
df.head()

In [None]:
# Display the last few rows of the dataframe

df.tail()

In [None]:
#Check for any missing values in the dataset
##df.isnull(): Checks for missing values in each column.

df.isnull()

In [None]:
#Check for any missing values in the dataset
##df.isnull().sum(): Checks for missing values in each column and sums them up.
print(df.isnull().sum())

In [None]:
#Display the summary statistics of the dataset
##df.describe(): Provides a summary of statistics for each numerical column in the DataFrame.
df.describe()

In [None]:
df.info()

In [None]:
sns.pairplot(df)

In [None]:
#plot boxplots for each feature in the dataset
plt.figure(figsize=(10,5))
sns.boxplot(df)
plt.title('Boxplot for each feature of Iris Dataset')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Plot histograms for each feature
df.hist(figsize=(10,5),bins=20)
plt.suptitle('Distribution of Each feature',fontsize=16)
plt.show()

In [None]:
# Plot distribution plots with KDE for each feature
for column in df.columns:
    plt.figure(figsize=(10,6))
    sns.histplot(df[column],kde=True)
    plt.title(f'Distribution of {column}',fontsize=14)
    plt.xlabel(column)
    plt.ylabel('frequency')
    plt.show()

In [None]:
print(df.dtypes)

In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder=LabelEncoder()
df['species']=label_encoder.fit_transform(df['species'])
df.head()

In [None]:
#Calculate the correlation matrix
##df.corr(): Computes the correlation matrix for the DataFrame, showing the correlation coefficients between variables.

tc=df.corr()

In [None]:
tc

In [None]:
#Visualize the correlation matrix using a heatmap
##sns.heatmap(): Creates a heatmap to visualize the correlation matrix.
##annot=True: Displays the correlation coefficient values in the heatmap.
##cmap='coolwarm': Sets the color map for the heatmap.
plt.figure(figsize=(10,5))
tc=df.corr().round(2)
sns.heatmap(tc,annot=True,cmap='coolwarm')


# Prepare the data for training the Linear Regression model

In [None]:
#Select the features and the target variable
#df.drop(): Removes the target variable 'species' from the features DataFrame X.
#df['species']: Selects the target variable y.

# Select the features and the target variable
x=df.drop('species',axis=1)
y=df['species']

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25,random_state=0)

# Standardize the feature variables

In [None]:
from sklearn.preprocessing import StandardScaler

# Standardize the feature variables
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


# Train a Logistic Regression model using the training data

In [None]:
from sklearn import metrics
model=LogisticRegression()
model.fit(x_train,y_train)
prediction=model.predict(x_test)
print('The accuracy of the logistic regression is:',metrics.accuracy_score(prediction,y_test))

In [None]:
print("Train Shape",x_train.shape)
print("Test Shape",x_test.shape)

In [None]:
model.score(x_train,y_train)*100

In [None]:
model.score(x_test,y_test)*100

In [None]:
print("Coefficients",model.coef_)
print("Intercept:",model.intercept_)

# Evaluate the Model's Performance

In [None]:
y_pred=model.predict(x_test)
print(y_pred)

In [None]:
mae= mean_absolute_error(y_test,y_pred)
mse= mean_squared_error(y_test,y_pred)
rmse=np.sqrt(mse)
print("Mean Absolute Error(MAE):",mae)
print("Mean Squared Error(MAE):",mse)
print("Root Mean Squared Error(RMSE):",rmse)

In [None]:
R2_score=model.score(x,y)


In [None]:
R2_score

In [None]:
from sklearn.svm import SVC
svc=SVC()
svc.fit(x_train,y_train)
prediction=svc.predict(x_test)
print('The accuracy of the SVC is:',metrics.accuracy_score(prediction,y_test))

# Confusion Matrix

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test, y_pred)
print('Confusion matrix, without normalization')
print(cm)



In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred, target_names=['Class0','Class1','Class2']))
