## Sklearn
* Scikit Learn
* https://scikit-learn.org/stable/
* built on Numpy, Scipy, Matplotlib 
* works with pandas
* workflow might be
    * pandas/numpy to load and manipulate data
    * sklearn to build and validate a model
    * matplotlib to visualize results
    * pandas/numpy to save results
* started by a Google intern in 2007 and opensource for anyone to use
* also has build in metric calculations, feature extraction and transformation tools

#### models follow a pretty similar syntax

#### very well documented code base, easy to follow and understand as well as extract information from a trained model

## Logistic Regression
* https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report
from sklearn.datasets import load_iris

iris = load_iris()

In [3]:
data = iris["data"]
labels = iris["target_names"]
feature_columns = iris["feature_names"]

df = pd.DataFrame(data, columns = feature_columns)
df["label"] = np.array([labels[x] for x in iris["target"]])

In [5]:
df.head(1)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),label
0,5.1,3.5,1.4,0.2,setosa


#### we will make a binary classificaiton, where 1 is the Setosa class and 0 is all other classes

In [9]:
df["classification"] = np.where(df["label"] == "setosa",1, 0)

In [10]:
df.sample(5)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),label,classification
104,6.5,3.0,5.8,2.2,virginica,0
7,5.0,3.4,1.5,0.2,setosa,1
74,6.4,2.9,4.3,1.3,versicolor,0
12,4.8,3.0,1.4,0.1,setosa,1
97,6.2,2.9,4.3,1.3,versicolor,0


#### split out our features and our target

In [15]:
x = df.drop(["label", "classification"], axis = 1)

In [16]:
y = df["classification"]

#### initialize our model
* https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html

In [20]:
reg = LogisticRegression(n_jobs = 4)

#### fit or train our model using the fit() method and passing in our x and y

In [21]:
reg.fit(x,y)

  " = {}.".format(effective_n_jobs(self.n_jobs)))


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=4, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

#### make predictions using our model, using the predict() method and passing in some feature data
* note, feature data must be the same size/schema, we can't make a model on 5 features and pass in 10

In [22]:
yhat = reg.predict(x)

In [23]:
yhat

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [24]:
np.array(y)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

#### we can score our model using the accuracy score from sklearn

In [25]:
accuracy_score(y, yhat)

1.0

In [30]:
(y == yhat).values

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,

In [28]:
sum(y == yhat)/len(yhat)

1.0

#### now we want to gather our coefficients, perhaps for interpretation.  we do so by accessing to coef_ attribuet from out model object

In [19]:
reg.coef_

array([[ 0.41021713,  1.46416217, -2.26003266, -1.02103509]])

In [34]:
# note this is a nested array
for i in reg.coef_:
    print(i)

[ 0.41021713  1.46416217 -2.26003266 -1.02103509]


#### ordering stays consistent, so we can zip the columns from out dataframe that was passed into the model and our coefficients
* note ethe coef_ is a nested array, so we have to get the values from the first element

In [20]:
for i in zip(x.columns, reg.coef_[0]):
    print(i)

('sepal.length', 0.41021712519841536)
('sepal.width', 1.4641621652467662)
('petal.length', -2.260032661311069)
('petal.width', -1.0210350909174157)


In [52]:
coeffs = pd.DataFrame({
    "feature_name":x.columns,
    "beta":reg.coef_[0]
})

coeffs

Unnamed: 0,feature_name,beta
0,sepal length (cm),0.410217
1,sepal width (cm),1.464162
2,petal length (cm),-2.260033
3,petal width (cm),-1.021035


#### we also need to get our intercept, as this is a regression problem

In [53]:
reg.intercept_

array([0.26421853])

In [54]:
coeffs.loc[len(coeffs)] = ["intercept", reg.intercept_[0]]

In [56]:
len(coeffs)

5

In [55]:
coeffs

Unnamed: 0,feature_name,beta
0,sepal length (cm),0.410217
1,sepal width (cm),1.464162
2,petal length (cm),-2.260033
3,petal width (cm),-1.021035
4,intercept,0.264219


#### Logistic Equation
.247 + .41(sepal.length) + 1.46(sepal.width) + -2.26(petal.length) + -1.02(petal.width)

In [22]:
x.head(2)

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2


#### custom prediction function

In [64]:
def custom_predict(sepal_length, sepal_width, petal_length, petal_width):
    return .247 + .41*(sepal_length) + 1.46*(sepal_width) + -2.26*(petal_length) + -1.02*(petal_width)

In [65]:
pred = custom_predict(5.1, 3.5, 1.4, .2)
pred

4.079999999999999

In [66]:
pred = custom_predict(4.9, 3.5, 1.4, .2)
pred

3.9979999999999998

In [30]:
y.head(2)

0    1
1    1
Name: classification, dtype: int64

# What's a better way to do this

#### let's us numpy to make this cleaener

In [59]:
reg.intercept_[0] + np.sum(np.multiply(reg.coef_[0], np.array(x.head(1))))

4.112640702773946

In [60]:
np.array(x.head(1))

array([[5.1, 3.5, 1.4, 0.2]])

In [61]:
def pred(features, coefs, intercept):
    return intercept + np.sum(np.multiply(features, coefs))

In [62]:
p = pred(np.array([5.1, 3.5, 1.4, .2]), reg.coef_[0], reg.intercept_[0])
p

4.112640702773946

#### predict probability
* Positive class probabilities are computed as
* 1 / (1 + np.exp(-self.decision_function(X))) where decision function is
* .247 + .371*(sepal_length) + 1.409*(sepal_width) + -2.152*(petal_length) + -.954*(petal_width)
* we are just applying the sigmoid function to our decision function

In [69]:
reg.predict_proba(x.head(2))

array([[0.01610102, 0.98389898],
       [0.03562213, 0.96437787]])

#### we can get the decision function using some matrix multiplication then summing across the axis and adding the intercept back in

In [70]:
x.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [71]:
reg.coef_

array([[ 0.41021713,  1.46416217, -2.26003266, -1.02103509]])

In [50]:
# we can get the decision function using some matrix multiplication
# then summing across the axis
x.head()*reg.coef_

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width
0,2.092107,5.124568,-3.164046,-0.204207
1,2.010064,4.392486,-3.164046,-0.204207
2,1.92802,4.685319,-2.938042,-0.204207
3,1.886999,4.538903,-3.390049,-0.204207
4,2.051086,5.270984,-3.164046,-0.204207


In [51]:
(x.head()*reg.coef_).sum(1)

0    3.848422
1    3.034298
2    3.471090
3    2.831645
4    3.953817
dtype: float64

In [52]:
(x*reg.coef_).sum(1)+reg.intercept_

0      4.112641
1      3.298516
2      3.735308
3      3.095864
4      4.218035
         ...   
145   -6.695391
146   -6.731138
147   -6.471124
148   -6.766841
149   -6.287044
Length: 150, dtype: float64

In [77]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [79]:
# Positive class probabilities are computed as
sigmoid(4.112641)

0.9838989862549731

In [80]:
1 - sigmoid(4.112641)

0.016101013745026926

In [81]:
preds = pd.DataFrame(sigmoid((x*reg.coef_).sum(axis = 1) + reg.intercept_), columns = ["positive"])
preds["negative"] = 1 - preds["positive"]

In [82]:
preds.head(10)

Unnamed: 0,positive,negative
0,0.983899,0.016101
1,0.964378,0.035622
2,0.97669,0.02331
3,0.956722,0.043278
4,0.985486,0.014514
5,0.980907,0.019093
6,0.974889,0.025111
7,0.975854,0.024146
8,0.950121,0.049879
9,0.965143,0.034857


In [58]:
reg.predict_proba(x.head(10))

array([[0.01610102, 0.98389898],
       [0.03562213, 0.96437787],
       [0.02330951, 0.97669049],
       [0.04327818, 0.95672182],
       [0.0145138 , 0.9854862 ],
       [0.01909304, 0.98090696],
       [0.02511113, 0.97488887],
       [0.02414588, 0.97585412],
       [0.0498789 , 0.9501211 ],
       [0.03485665, 0.96514335]])

#### behind the scenes it's all vector and matrix operations

#### let's think about sometehing like KNN
* say we have something like the iris 
* what would be a good first step to finding the nearest neighbors?
    * think through something we covered in homework 2?
* after this how could we find the nearest neighbors?
    * a nice numpy function

## Normalize
* remove magnitude of our features
* center data
* standard scaler
* z score norm
* min-max

## Why normalize data? 

In [87]:
# one is height in feet
# second is pounds
a = np.array([6,200])
b = np.array([5.5,175])

In [89]:
(a - b)**2

array([2.50e-01, 6.25e+02])

In [90]:
sum((a - b)**2)

625.25

In [91]:
np.sqrt(sum((a - b)**2))

25.004999500099974

In [121]:
tst = df.drop("classification", 1)
tst = pd.get_dummies(tst)

In [122]:
# will we crowd out our dummies?
tst

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),label_setosa,label_versicolor,label_virginica
0,5.1,3.5,1.4,0.2,1,0,0
1,4.9,3.0,1.4,0.2,1,0,0
2,4.7,3.2,1.3,0.2,1,0,0
3,4.6,3.1,1.5,0.2,1,0,0
4,5.0,3.6,1.4,0.2,1,0,0
...,...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,0,0,1
146,6.3,2.5,5.0,1.9,0,0,1
147,6.5,3.0,5.2,2.0,0,0,1
148,6.2,3.4,5.4,2.3,0,0,1


In [92]:
#df = pd.read_csv("data/iris.csv")
df.head(1)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),label,classification
0,5.1,3.5,1.4,0.2,setosa,1


In [93]:
df.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),classification
count,150.0,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333,0.333333
std,0.828066,0.435866,1.765298,0.762238,0.472984
min,4.3,2.0,1.0,0.1,0.0
25%,5.1,2.8,1.6,0.3,0.0
50%,5.8,3.0,4.35,1.3,0.0
75%,6.4,3.3,5.1,1.8,1.0
max,7.9,4.4,6.9,2.5,1.0


#### Standard Scaler
* https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
* x-mu/std (zscore), standard deviations from the mean

In [109]:
from sklearn.preprocessing import StandardScaler

In [110]:
x = df.drop(["classification", "label"], 1)

In [111]:
x.head(1)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2


In [112]:
scaler = StandardScaler()
scaler.fit(x)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [113]:
x_scaler = scaler.transform(x)

In [114]:
x_scaler[0]

array([-0.90068117,  1.01900435, -1.34022653, -1.3154443 ])

In [115]:
# note the transform returns a numpy array
x_scaler = scaler.transform(x)
x_scale_df = pd.DataFrame(x_scaler, columns = x.columns)
x_scale_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,-0.900681,1.019004,-1.340227,-1.315444
1,-1.143017,-0.131979,-1.340227,-1.315444
2,-1.385353,0.328414,-1.397064,-1.315444
3,-1.506521,0.098217,-1.283389,-1.315444
4,-1.021849,1.249201,-1.340227,-1.315444


In [116]:
x_scale_df.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,150.0
mean,-1.690315e-15,-1.84297e-15,-1.698641e-15,-1.409243e-15
std,1.00335,1.00335,1.00335,1.00335
min,-1.870024,-2.433947,-1.567576,-1.447076
25%,-0.9006812,-0.592373,-1.226552,-1.183812
50%,-0.05250608,-0.1319795,0.3364776,0.1325097
75%,0.6745011,0.5586108,0.7627583,0.7906707
max,2.492019,3.090775,1.785832,1.712096


#### Min Max Scaler
* https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html

In [117]:
from sklearn.preprocessing import MinMaxScaler

In [118]:
minmax = MinMaxScaler()
minmax.fit(x)

x_minmax = minmax.transform(x)
x_minmax = pd.DataFrame(x_minmax, columns = x.columns)
x_minmax.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,150.0
mean,0.428704,0.440556,0.467458,0.458056
std,0.230018,0.181611,0.299203,0.317599
min,0.0,0.0,0.0,0.0
25%,0.222222,0.333333,0.101695,0.083333
50%,0.416667,0.416667,0.567797,0.5
75%,0.583333,0.541667,0.694915,0.708333
max,1.0,1.0,1.0,1.0


* https://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing

#### center data
* https://scikit-learn.org/stable/modules/preprocessing.html

In [92]:
from sklearn.preprocessing import scale

In [95]:
scaler = scale(x)
x_scale = pd.DataFrame(scaler, columns = x.columns)
x_scale.mean(axis=0)

sepal.length   -2.775558e-16
sepal.width    -9.695948e-16
petal.length   -8.652338e-16
petal.width    -4.662937e-16
dtype: float64

## countvectorizer()
* https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html

In [98]:
from sklearn.feature_extraction.text import CountVectorizer

In [96]:
corpus = [
    'This is the first document.',
    'This document is the second document.',
    'And this is the third one.',
    'Is this the first document?',
]

In [99]:
vectorizer = CountVectorizer()

In [100]:
x = vectorizer.fit_transform(corpus)
x

<4x9 sparse matrix of type '<class 'numpy.int64'>'
	with 21 stored elements in Compressed Sparse Row format>

In [101]:
x.toarray()

array([[0, 1, 1, 1, 0, 0, 1, 0, 1],
       [0, 2, 0, 1, 0, 1, 1, 0, 1],
       [1, 0, 0, 1, 1, 0, 1, 1, 1],
       [0, 1, 1, 1, 0, 0, 1, 0, 1]])

In [102]:
df = pd.DataFrame(x.toarray(), columns = vectorizer.get_feature_names())

In [103]:
df

Unnamed: 0,and,document,first,is,one,second,the,third,this
0,0,1,1,1,0,0,1,0,1
1,0,2,0,1,0,1,1,0,1
2,1,0,0,1,1,0,1,1,1
3,0,1,1,1,0,0,1,0,1


In [104]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [105]:
vectorizer = TfidfVectorizer()
x = vectorizer.fit_transform(corpus)
df = pd.DataFrame(x.toarray(), columns = vectorizer.get_feature_names())
df

Unnamed: 0,and,document,first,is,one,second,the,third,this
0,0.0,0.469791,0.580286,0.384085,0.0,0.0,0.384085,0.0,0.384085
1,0.0,0.687624,0.0,0.281089,0.0,0.538648,0.281089,0.0,0.281089
2,0.511849,0.0,0.0,0.267104,0.511849,0.0,0.267104,0.511849,0.267104
3,0.0,0.469791,0.580286,0.384085,0.0,0.0,0.384085,0.0,0.384085


## What could we do with a dataframe like this?

## Featuree Extraction and Selection
* https://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_extraction.text

In [124]:
from sklearn.feature_selection import VarianceThreshold

In [130]:
x = [[0, 2, 0, 3], [0, 1, 4, 3], [0, 1, 1, 3]]
df = pd.DataFrame(x, columns = ["a", "b", "c", "d"])
df

Unnamed: 0,a,b,c,d
0,0,2,0,3
1,0,1,4,3
2,0,1,1,3


In [131]:
df.describe()

Unnamed: 0,a,b,c,d
count,3.0,3.0,3.0,3.0
mean,0.0,1.333333,1.666667,3.0
std,0.0,0.57735,2.081666,0.0
min,0.0,1.0,0.0,3.0
25%,0.0,1.0,0.5,3.0
50%,0.0,1.0,1.0,3.0
75%,0.0,1.5,2.5,3.0
max,0.0,2.0,4.0,3.0


In [132]:
selector = VarianceThreshold()
tst = selector.fit_transform(df)
tst

array([[2, 0],
       [1, 4],
       [1, 1]])

In [133]:
selector.variances_

array([0.        , 0.22222222, 2.88888889, 0.        ])

In [134]:
df.columns[selector.get_support()]

Index(['b', 'c'], dtype='object')

In [138]:
selector.get_support()

array([False,  True,  True, False])

In [135]:
new_df = pd.DataFrame(tst, columns = df.columns[selector.get_support()])

In [136]:
new_df

Unnamed: 0,b,c
0,2,0
1,1,4
2,1,1


### DictVectorizer
* convert a bunch of dictionaries to feature dataframe

In [139]:
from sklearn.feature_extraction import DictVectorizer

In [165]:
vectorizer = DictVectorizer(sparse = False)

In [166]:
dct= [
      {'feature1': 5, 'feature2': 10}, 
      {'feature3': 3, 'feature2': 2}, 
      {'feature4': 3, 'feature2': 2}
     ]

In [167]:
dct

[{'feature1': 5, 'feature2': 10},
 {'feature3': 3, 'feature2': 2},
 {'feature4': 3, 'feature2': 2}]

In [168]:
t = vectorizer.fit_transform(dct)

In [173]:
pd.DataFrame(t, columns = vectorizer.get_feature_names())

Unnamed: 0,feature1,feature2,feature3,feature4
0,5.0,10.0,0.0,0.0
1,0.0,2.0,3.0,0.0
2,0.0,2.0,0.0,3.0


### Decomposition

In [2]:
from sklearn.decomposition import PCA

In [4]:
df.head(1)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),label
0,5.1,3.5,1.4,0.2,setosa


In [5]:
pca = PCA(n_components=2).fit(df.drop("label", 1))

In [8]:
t = pca.transform(df.drop("label", 1))
tdf = pd.DataFrame(t, columns = ["component_1", "component_2"])
tdf.head(5)

Unnamed: 0,component_1,component_2
0,-2.684126,0.319397
1,-2.714142,-0.177001
2,-2.888991,-0.144949
3,-2.745343,-0.318299
4,-2.728717,0.326755


In [9]:
# explained variance
print(pca.explained_variance_ratio_)

[0.92461872 0.05306648]


In [14]:
# if we wanted to hit a certain variance captured
c = 1
ex_r = 0

while ex_r < .98:
    pca = PCA(n_components=c).fit(df.drop("label", 1))
    ex_r = sum(pca.explained_variance_ratio_)
    print("Components:{}".format(c))
    print("Explained Variance:{}".format(ex_r))
    print("\n")
    c+=1

Components:1
Explained Variance:0.9246187232017271


Components:2
Explained Variance:0.977685206318795


Components:3
Explained Variance:0.9947878161267247




In [15]:
pca = PCA(n_components=3).fit(df.drop("label", 1))

In [17]:
loadings = pd.DataFrame(pca.components_, columns = df.drop("label", 1).columns)
loadings

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,0.361387,-0.084523,0.856671,0.358289
1,0.656589,0.730161,-0.173373,-0.075481
2,-0.58203,0.597911,0.076236,0.545831


* https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF

In [18]:
from sklearn.decomposition import NMF

In [26]:
nmf = NMF(n_components = 2).fit(df.drop("label", 1))
t = nmf.fit_transform(df.drop("label", 1))
tdf = pd.DataFrame(t, columns = ["components_1", "components_2"])

In [31]:
tdf.shape

(150, 2)

* https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.FactorAnalysis.html#sklearn.decomposition.FactorAnalysis

In [28]:
from sklearn.decomposition import FactorAnalysis

In [29]:
fa = FactorAnalysis(n_components = 2).fit(df.drop("label", 1))
t = fa.fit_transform(df.drop("label", 1))
tdf = pd.DataFrame(t, columns = ["components_1", "components_2"])

In [30]:
tdf.shape

(150, 2)