In [7]:
import numpy as np
from sklearn import linear_model

In [8]:
#X represents the size of a tumor in centimeters.
X = np.array([3.78, 2.44, 2.09, 0.14, 1.72, 1.65, 4.92, 4.37, 4.96, 4.52, 3.69, 5.88]).reshape(-1,1)

#Note: X has to be reshaped into a column from a row for the LogisticRegression() function to work.
#y represents whether or not the tumor is cancerous (0 for "No", 1 for "Yes").
y = np.array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1])

In [5]:
logr = linear_model.LogisticRegression()
logr.fit(X,y)

In [9]:
#predict tumour if cancer of size 3.55mm
y_pred = logr.predict(np.array([3.55]).reshape(-1,1))
print(y_pred)

[1]


In [11]:
log_odds = logr.coef_
odds = np.exp(log_odds)

print(odds)

[[4.03541657]]


It implies that `the size of a tumor increases by 1mm the odds of it being a tumor increases by 4x.`

The coefficient and intercept values are used for finding the probability that each tumor is cancerous.

Let's create a function that uses the model's coefficient and intercept values for returning a new value. This new value represents probability that the given observation is a tumor:

In [12]:
def logit2prob(logr,x):
  log_odds = logr.coef_ * x + logr.intercept_    #formula analogous to linear regression for extracting the coefficient and the intercept.
  odds = np.exp(log_odds)                     #To convert the log-odds to odds we must exponentiate the log-odds.
  probability = odds / (1 + odds)       #sigmoid         #Converting the odds to probability by dividing it by 1 plus the odds.
  return(probability)

print(logit2prob(logr, X))

[[0.60749955]
 [0.19268876]
 [0.12775886]
 [0.00955221]
 [0.08038616]
 [0.07345637]
 [0.88362743]
 [0.77901378]
 [0.88924409]
 [0.81293497]
 [0.57719129]
 [0.96664243]]



The output can be interpreted in the following way:

1. The probability that a tumor with the size 3.78cm is cancerous is 60.75% (approx).
2. The probability that a tumor with the size 2.44cm is cancerous is 19.269% (approx).
3. The probability that a tumor with the size 2.09cm is cancerous is 12.775% (approx).