In [10]:
# LOGISTIC REGRESSION

import numpy as np

from sklearn import datasets
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.preprocessing import StandardScaler


In [5]:
# load data with only two classes

iris = datasets.load_iris()
features = iris.data[:100, :]
target = iris.target[:100]

# standardize features
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# create logistic regression
log_reg = LogisticRegression(random_state=0)
# train model
model = log_reg.fit(features_standardized, target)

# Create new observation
new_observation = [[.5, .5, .5, .5]]
# predict class
model.predict(new_observation)

array([1])

In [7]:
# see probabilities
model.predict_proba(new_observation)

array([[0.17738424, 0.82261576]])

In [9]:
# TRAINING A MULTICLASS CLASSIFIER

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target
# Standardize features
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# create one vs rest logistic regression
# we can also choose multinominal version
log_reg = LogisticRegression(random_state=0,
                             multi_class='ovr')
# train model
model = log_reg.fit(features_standardized, target)

In [12]:
# REDUCE VARIANCE THROUGH REGULARIZATION

# load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# standardize features
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# create log_reg with cross validation
log_reg = LogisticRegressionCV(penalty='l2',
                              Cs=10,
                              random_state=0,
                              n_jobs=-1)

# train model
model = log_reg.fit(features_standardized, target)

In [13]:
# TRAINING A CLASSIFIER ON VERY LARGE DATA

# load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# standardize features
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# create logistic regression
log_reg = LogisticRegression(random_state=0,
                            solver='sag')
# sag - stochastic average gradient for big datasets

# train model
model = log_reg.fit(features_standardized, target)

In [14]:
# HANDLING IMBALANCED CLASSES

# load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# make imbalance
features = features[40:, :]
target = target[40:]

# create target vector indicating if class 0, otherwise 1
target = np.where((target==0), 0, 1)

# standardize features
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# create logistic regression
log_reg = LogisticRegression(random_state=0,
                            class_weight='balanced')

# train model
model = log_reg.fit(features_standardized, target)