# **Logistic Regression**

### Real life examples  

=====================  
Gaming - win vs. loss  
Sales - Buying or not buying  
Marketing - Response or no response  
Credit card - Fraud or no fraud  
Load - Default or no default  
Website - click or no click  
Healthcare - Cure or no cure  

# **Project Age vs Buying**  

In [191]:
import warnings 
warnings.filterwarnings('ignore')
import pandas as pd

sales = pd.read_csv('Product_sales.csv')
sales.head(2)

Unnamed: 0,Age,Bought
0,1,0
1,1,0


In [192]:
sales.tail(2)

Unnamed: 0,Age,Bought
465,50,0
466,61,1


In [193]:
# Build Logistic Regression Model
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression()


In [194]:
# Train the model
logistic.fit( sales[['Age']], sales['Bought'])

LogisticRegression()

In [195]:
# Four years child  
logistic.predict([[4]]) # if out = 0, person will nnot buy.

array([0], dtype=int64)

In [196]:
# 45 years old man
logistic.predict([[45]]) # if out = 1, person will buy.

array([1], dtype=int64)

# **Project Mushrooms Classification - Edible / Toxic**  

In [197]:
df = pd.read_csv('mushrooms.csv')
df.head(2)

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g


Attribute Information: (classes: edible=e, poisonous=p)

cap-shape: bell=b,conical=c,convex=x,flat=f, knobbed=k,sunken=s

cap-surface: fibrous=f,grooves=g,scaly=y,smooth=s

cap-color: brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yellow=y

bruises: bruises=t,no=f

odor: almond=a,anise=l,creosote=c,fishy=y,foul=f,musty=m,none=n,pungent=p,spicy=s

gill-attachment: attached=a,descending=d,free=f,notched=n

gill-spacing: close=c,crowded=w,distant=d

gill-size: broad=b,narrow=n

gill-color: black=k,brown=n,buff=b,chocolate=h,gray=g, green=r,orange=o,pink=p,purple=u,red=e,white=w,yellow=y

stalk-shape: enlarging=e,tapering=t

stalk-root: bulbous=b,club=c,cup=u,equal=e,rhizomorphs=z,rooted=r,missing=?

stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s

stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s

stalk-color-above-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y

stalk-color-below-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y

veil-type: partial=p,universal=u

veil-color: brown=n,orange=o,white=w,yellow=y

ring-number: none=n,one=o,two=t

ring-type: cobwebby=c,evanescent=e,flaring=f,large=l,none=n,pendant=p,sheathing=s,zone=z

spore-print-color: black=k,brown=n,buff=b,chocolate=h,green=r,orange=o,purple=u,white=w,yellow=y

population: abundant=a,clustered=c,numerous=n,scattered=s,several=v,solitary=y

habitat: grasses=g,leaves=l,meadows=m,paths=p,urban=u,waste=w,woods=d


In [198]:
df.shape

(8124, 23)

In [199]:
df['class'].value_counts()

e    4208
p    3916
Name: class, dtype: int64

In [200]:
# Check for any missing values
df.isnull().sum() # if yest values will be other that 0 eg. 1 , 2, 3, etc,.

class                       0
cap-shape                   0
cap-surface                 0
cap-color                   0
bruises                     0
odor                        0
gill-attachment             0
gill-spacing                0
gill-size                   0
gill-color                  0
stalk-shape                 0
stalk-root                  0
stalk-surface-above-ring    0
stalk-surface-below-ring    0
stalk-color-above-ring      0
stalk-color-below-ring      0
veil-type                   0
veil-color                  0
ring-number                 0
ring-type                   0
spore-print-color           0
population                  0
habitat                     0
dtype: int64

## **Encode categorical featuring to number**

In [201]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np

le = LabelEncoder()

for col in df.columns:
    df[col] = le.fit_transform( df[col] )

df

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,1,5,2,4,1,6,1,0,1,4,...,2,7,7,0,2,1,4,2,3,5
1,0,5,2,9,1,0,1,0,0,4,...,2,7,7,0,2,1,4,3,2,1
2,0,0,2,8,1,3,1,0,0,5,...,2,7,7,0,2,1,4,3,2,3
3,1,5,3,8,1,6,1,0,1,5,...,2,7,7,0,2,1,4,2,3,5
4,0,5,2,3,0,5,1,1,0,4,...,2,7,7,0,2,1,0,3,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,0,3,2,4,0,5,0,0,0,11,...,2,5,5,0,1,1,4,0,1,2
8120,0,5,2,4,0,5,0,0,0,11,...,2,5,5,0,0,1,4,0,4,2
8121,0,2,2,4,0,5,0,0,0,5,...,2,5,5,0,1,1,4,0,1,2
8122,1,3,3,4,0,8,1,0,1,0,...,1,7,7,0,2,1,0,7,4,2


In [202]:
# split dataframe in x & y

y = df.iloc[:, 0].values  #df.iloc[row_start:row_end, cloumn_start:column_end]
type(y)

numpy.ndarray

In [203]:
x = df.iloc[:, 1:23].values
type(x)

numpy.ndarray

In [204]:
# Split data in training & testing data

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=0)

In [205]:
# Build model
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(random_state=0)

In [206]:
# Training the model
clf.fit(x_train, y_train)

LogisticRegression(random_state=0)

In [207]:
# Predict y_predict from x_test
y_predict = clf.predict(x_test)

In [208]:
# Validation - Confusion matrics

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_predict)
cm

array([[820,  32],
       [ 41, 732]], dtype=int64)

<img src='confusion_matrix.png'>

In [209]:
accuracy = (820+732) / (820+32+41+732)
accuracy #Manual method to find accuracy.

0.955076923076923

In [210]:
from sklearn.metrics import accuracy_score
accuracy1 = accuracy_score(y_test, y_predict)
accuracy1 #Using liberaries to find accuracy.

0.955076923076923

## **Project:3 Telecom customer attrition prediction**

In [211]:
fiber = pd.read_csv('Fiberbits.csv')
fiber.head(2)


Unnamed: 0,active_cust,income,months_on_network,Num_complaints,number_plan_changes,relocated,monthly_bill,technical_issues_per_month,Speed_test_result
0,0,1586,85,4,1,0,121,4,85
1,0,1581,85,4,1,0,133,4,85


In [212]:
fiber.shape

(100000, 9)

In [213]:
fiber.active_cust.value_counts()

1    57859
0    42141
Name: active_cust, dtype: int64

In [214]:
# Split data in x & y
x = fiber.drop(['active_cust'], axis=1)
y = fiber.active_cust


In [215]:
# Build Logistic Regression Model
from sklearn.linear_model import LogisticRegression
logistic1 = LogisticRegression()

In [216]:
# Train the model
logistic1.fit(x, y)

LogisticRegression()

In [217]:
# Predict y from x
y_predict = logistic1.predict(x)

In [218]:
cm1 = confusion_matrix(fiber[['active_cust']], y_predict)
cm1

array([[26034, 16107],
       [13114, 44745]], dtype=int64)

In [219]:
accuracy2 = (26034+44745) / (26034+16107+13114+44745)
accuracy2 #Manual method to find accuracy.

0.70779