# Logistic Regression With IRIS data

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Load Data

In [None]:
import os
import urllib.request

data_location = "iris.csv"
data_url = 'https://elephantscale-public.s3.amazonaws.com/data/iris/iris_combined.csv'

if not os.path.exists (data_location):
    data_location = os.path.basename(data_location)
    if not os.path.exists(data_location):
        print("Downloading : ", data_url)
        urllib.request.urlretrieve(data_url, data_location)
print('data_location:', data_location)

In [None]:
import pandas as pd
pd.options.display.float_format = '{:,.2f}'.format

iris = pd.read_csv(data_location)
iris.sample(10)

## Exploratory Data Analysis (EDA)

In [None]:
iris.describe()

In [None]:
# how are our labels distributed

iris['label'].value_counts()
iris['label'].value_counts(normalize=True)

## Shape the data

In [None]:
x = iris[['c1', 'c2', 'c3', 'c4']]
y = iris['label']
print ('x : ', x.shape)
print ('y : ', y.shape)

## Split into traing and testing data

In [None]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train, y_test = train_test_split(x,y,  test_size=0.2)
print ("x_train :" , x_train.shape )
print ("x_test :", x_test.shape)
print ("y_train :", y_train.shape)
print ("y_test :", y_test.shape)

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(max_iter=500)

# Fit the model
model = lr.fit(x_train, y_train)


In [None]:
print('coef : ', model.coef_)
print('intercept' , model.intercept_)

## Model Evaluation

In [None]:
y_pred = model.predict (x_test)
y_pred

In [None]:
train_accuracy = model.score(x_train,y_train)
test_accuracy = model.score(x_test,y_test)

print ("Train accuracy: ", train_accuracy)
print ("Test accuracy: ", test_accuracy)

In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np

cm_labels = np.unique(y)
cm_array = confusion_matrix(y_test, y_pred)
# print (cm_array)

cm_df = pd.DataFrame(cm_array, index=cm_labels, columns=cm_labels)
cm_df

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize = (8,5))

# colormaps : cmap="YlGnBu" , cmap="Greens", cmap="Blues",  cmap="Reds"
sns.heatmap(cm_df, annot=True, cmap="Reds", fmt='d').plot()