In [1]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


### Read the datasets

In [2]:
import pandas
ha_df = pandas.read_csv("/content/drive/My Drive/datasets/ha.csv")
hb_df = pandas.read_csv("/content/drive/My Drive/datasets/hb.csv")
hc_df = pandas.read_csv("/content/drive/My Drive/datasets/hc.csv")
hd_df = pandas.read_csv("/content/drive/My Drive/datasets/hd.csv")

In [3]:
ha_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1923 entries, 0 to 1922
Columns: 145 entries, AGE to FETUS1MECONIUMASPIRATIONSYNDROME
dtypes: float64(13), int64(8), object(124)
memory usage: 2.1+ MB


In [4]:
hb_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 893 entries, 0 to 892
Columns: 145 entries, AGE to FETUS1MECONIUMASPIRATIONSYNDROME
dtypes: float64(38), int64(9), object(98)
memory usage: 1011.7+ KB


In [5]:
hc_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2458 entries, 0 to 2457
Columns: 145 entries, AGE to FETUS1MECONIUMASPIRATIONSYNDROME
dtypes: float64(22), int64(9), object(114)
memory usage: 2.7+ MB


In [6]:
hd_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 883 entries, 0 to 882
Columns: 145 entries, AGE to FETUS1MECONIUMASPIRATIONSYNDROME
dtypes: float64(42), int64(13), object(90)
memory usage: 1000.4+ KB


### Concatenating the datasets

In [7]:
c_df = pandas.concat([ha_df, hb_df, hc_df, hd_df], ignore_index=True)
c_df

Unnamed: 0,AGE,COUPLESITUATION,HEIGHT,WEIGHT,BMI,COUNTRYOFORIGYN,MATERNALEDUCATION,STARTANTENATALCARRE,SUBSTANCEABUSE,SMOKING,...,HEMATOMALDRAINAGE,INTRAPARTUMpHvalue,sexfetus1,weightfetus1,apgarfetus1,apgarfetus1fivemin,pHvaluefetus1umbilicalarthery,FETUS1ADMISSIONICU,FETUS1RECOVERY,FETUS1MECONIUMASPIRATIONSYNDROME
0,30,withcouple,1.58,70.0,28.04,CHILE,secondary,1ºtrimester,f,f,...,,,Masculino,2640,7,9,7.34,f,1:Aspiraciónnasofaríngea,f
1,38,withcouple,1.61,79.0,30.48,ESPAÑA,secondary,1ºtrimester,f,f,...,,,Femenino,3040,8,9,7.26,f,0:noprecisa,f
2,25,withcouple,1.56,72.0,29.59,COLOMBIA,secondary,1ºtrimester,f,f,...,,,Femenino,3820,10,10,7.21,f,0:noprecisa,f
3,31,withcouple,1.62,54.0,20.58,ESPAÑA,secondary,1ºtrimester,f,f,...,,,Femenino,3390,9,10,7.21,f,0:noprecisa,f
4,28,withcouple,1.47,51.0,23.60,ESPAÑA,secondary,1ºtrimester,f,f,...,,,Masculino,3020,8,9,7.17,f,0:noprecisa,f
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6152,35,withcouple,1.56,49.0,20.00,ESPAÑA,secondary,1ºtrimester,f,f,...,1960,10,10,7.27,f,,f,,,
6153,29,withcouple,1.64,59.0,21.51,ESPAÑA,secondary,1ºtrimester,f,f,...,3310,9,10,DESCONOCIDO,f,,f,,,
6154,39,withcouple,1.59,64.0,20.00,ESPAÑA,secondary,1ºtrimester,f,f,...,3830,8,9,DESCONOCIDO,f,,f,,,
6155,41,withcouple,1.48,50.0,23.28,ESPAÑA,primary,1ºtrimester,f,f,...,2810,10,10,DESCONOCIDO,f,,f,,,


### Information of the concatenated dataset

In [8]:
c_df.info(145)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6157 entries, 0 to 6156
Data columns (total 145 columns):
 #    Column                                  Dtype  
---   ------                                  -----  
 0    AGE                                     int64  
 1    COUPLESITUATION                         object 
 2    HEIGHT                                  float64
 3    WEIGHT                                  float64
 4    BMI                                     float64
 5    COUNTRYOFORIGYN                         object 
 6    MATERNALEDUCATION                       object 
 7    STARTANTENATALCARRE                     object 
 8    SUBSTANCEABUSE                          object 
 9    SMOKING                                 object 
 10   CIGARRETTESPERDAY                       float64
 11   ALCOHOL                                 object 
 12   DAILYALCOHOLINTAKE                      object 
 13   KGINCREASEDPREGNANCY                    int64  
 14   ART                   

### Checking missing values

In [9]:
c_df.isnull().sum()

AGE                                    0
COUPLESITUATION                        0
HEIGHT                                 0
WEIGHT                                 0
BMI                                    0
                                    ... 
apgarfetus1fivemin                  4219
pHvaluefetus1umbilicalarthery          0
FETUS1ADMISSIONICU                  4238
FETUS1RECOVERY                      4235
FETUS1MECONIUMASPIRATIONSYNDROME    4234
Length: 145, dtype: int64

### Class labels

In [10]:
c_df.TYPEOFBIRTH.unique()

array(['emergencyc-section', 'CESprogrammed', 'EUT', 'vacum', 'FORC',
       'ESP', 'NALGASVAGINAL'], dtype=object)

In [11]:
a = c_df['TYPEOFBIRTH'].values.tolist()
s = set(a)
for x in s:
  print(x,":",a.count(x))

FORC : 87
CESprogrammed : 325
EUT : 4231
NALGASVAGINAL : 8
emergencyc-section : 692
ESP : 44
vacum : 770


### Preprocessing

### Dataset after feature extraction

In [12]:
h_df = c_df[['PREVIOUSCESAREAN','COMPLICATIONS','ROBSONGROUP','ARTMODE','PREVIOUSPRETERMPREGNANCIES','AMNIOCENTESIS','PREINDUCTION','INDUCTION',
        'EPISIOTOMY','OXYTOCIN','FetalINTRAPARTUMpH','TYPEOFBIRTH']]
  
h_df

Unnamed: 0,PREVIOUSCESAREAN,COMPLICATIONS,ROBSONGROUP,ARTMODE,PREVIOUSPRETERMPREGNANCIES,AMNIOCENTESIS,PREINDUCTION,INDUCTION,EPISIOTOMY,OXYTOCIN,FetalINTRAPARTUMpH,TYPEOFBIRTH
0,f,f,group2a,,0,f,t,t,,t,f,emergencyc-section
1,f,f,group6,,0,f,f,f,,f,f,CESprogrammed
2,f,f,group1,,0,f,f,f,f,t,f,EUT
3,t,f,group5,,0,f,f,f,f,f,f,EUT
4,f,f,group1,FIV,0,f,f,f,f,t,f,EUT
...,...,...,...,...,...,...,...,...,...,...,...,...
6152,f,f,group8,,0,t,f,f,t,f,f,EUT
6153,f,f,group3,,0,f,f,f,f,f,f,EUT
6154,f,f,group1,IAD,0,f,f,f,f,f,f,EUT
6155,f,f,group2a,FIV,0,f,f,t,f,t,t,emergencyc-section


### Checking duplicates

In [13]:
h_df.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
6152    False
6153     True
6154    False
6155    False
6156     True
Length: 6157, dtype: bool

In [14]:
h_df.duplicated().sum()

5261

### Drop duplicate records

In [15]:
h_df = h_df.drop_duplicates()
h_df

Unnamed: 0,PREVIOUSCESAREAN,COMPLICATIONS,ROBSONGROUP,ARTMODE,PREVIOUSPRETERMPREGNANCIES,AMNIOCENTESIS,PREINDUCTION,INDUCTION,EPISIOTOMY,OXYTOCIN,FetalINTRAPARTUMpH,TYPEOFBIRTH
0,f,f,group2a,,0,f,t,t,,t,f,emergencyc-section
1,f,f,group6,,0,f,f,f,,f,f,CESprogrammed
2,f,f,group1,,0,f,f,f,f,t,f,EUT
3,t,f,group5,,0,f,f,f,f,f,f,EUT
4,f,f,group1,FIV,0,f,f,f,f,t,f,EUT
...,...,...,...,...,...,...,...,...,...,...,...,...
6143,f,f,group2b,Ovodón,0,f,f,f,f,f,f,CESprogrammed
6149,f,f,group2a,,0,f,f,t,f,f,f,FORC
6152,f,f,group8,,0,t,f,f,t,f,f,EUT
6154,f,f,group1,IAD,0,f,f,f,f,f,f,EUT


In [16]:
import numpy as np
h_df = h_df.replace(r'^\s*$', np.nan, regex=True)

### Handling missing values of categorical attributes by mode

In [17]:
s = (h_df.dtypes == 'object')
#print(s)
#print(s[s])
object_cols = list(s[s].index)
print("Categorical variables:")
print(object_cols)
print(len(object_cols))

Categorical variables:
['PREVIOUSCESAREAN', 'COMPLICATIONS', 'ROBSONGROUP', 'ARTMODE', 'AMNIOCENTESIS', 'PREINDUCTION', 'INDUCTION', 'EPISIOTOMY', 'OXYTOCIN', 'FetalINTRAPARTUMpH', 'TYPEOFBIRTH']
11


In [18]:
for i in object_cols:
  x = h_df[i].mode()[0]
  h_df[i].fillna(x,inplace = True)

  
h_df

Unnamed: 0,PREVIOUSCESAREAN,COMPLICATIONS,ROBSONGROUP,ARTMODE,PREVIOUSPRETERMPREGNANCIES,AMNIOCENTESIS,PREINDUCTION,INDUCTION,EPISIOTOMY,OXYTOCIN,FetalINTRAPARTUMpH,TYPEOFBIRTH
0,f,f,group2a,FIV,0,f,t,t,f,t,f,emergencyc-section
1,f,f,group6,FIV,0,f,f,f,f,f,f,CESprogrammed
2,f,f,group1,FIV,0,f,f,f,f,t,f,EUT
3,t,f,group5,FIV,0,f,f,f,f,f,f,EUT
4,f,f,group1,FIV,0,f,f,f,f,t,f,EUT
...,...,...,...,...,...,...,...,...,...,...,...,...
6143,f,f,group2b,Ovodón,0,f,f,f,f,f,f,CESprogrammed
6149,f,f,group2a,FIV,0,f,f,t,f,f,f,FORC
6152,f,f,group8,FIV,0,t,f,f,t,f,f,EUT
6154,f,f,group1,IAD,0,f,f,f,f,f,f,EUT


### Handling missing values of Numerical attributes using mean

In [19]:
s = (h_df.dtypes != 'object')
number_cols = list(s[s].index)
print("Numerical variables:")
print(number_cols)
print(len(number_cols))

Numerical variables:
['PREVIOUSPRETERMPREGNANCIES']
1


In [20]:
for i in number_cols:
  x = h_df[i].mean()
  h_df[i].fillna(x,inplace = True)

  
h_df

Unnamed: 0,PREVIOUSCESAREAN,COMPLICATIONS,ROBSONGROUP,ARTMODE,PREVIOUSPRETERMPREGNANCIES,AMNIOCENTESIS,PREINDUCTION,INDUCTION,EPISIOTOMY,OXYTOCIN,FetalINTRAPARTUMpH,TYPEOFBIRTH
0,f,f,group2a,FIV,0,f,t,t,f,t,f,emergencyc-section
1,f,f,group6,FIV,0,f,f,f,f,f,f,CESprogrammed
2,f,f,group1,FIV,0,f,f,f,f,t,f,EUT
3,t,f,group5,FIV,0,f,f,f,f,f,f,EUT
4,f,f,group1,FIV,0,f,f,f,f,t,f,EUT
...,...,...,...,...,...,...,...,...,...,...,...,...
6143,f,f,group2b,Ovodón,0,f,f,f,f,f,f,CESprogrammed
6149,f,f,group2a,FIV,0,f,f,t,f,f,f,FORC
6152,f,f,group8,FIV,0,t,f,f,t,f,f,EUT
6154,f,f,group1,IAD,0,f,f,f,f,f,f,EUT


In [21]:
h_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 896 entries, 0 to 6155
Data columns (total 12 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   PREVIOUSCESAREAN            896 non-null    object
 1   COMPLICATIONS               896 non-null    object
 2   ROBSONGROUP                 896 non-null    object
 3   ARTMODE                     896 non-null    object
 4   PREVIOUSPRETERMPREGNANCIES  896 non-null    int64 
 5   AMNIOCENTESIS               896 non-null    object
 6   PREINDUCTION                896 non-null    object
 7   INDUCTION                   896 non-null    object
 8   EPISIOTOMY                  896 non-null    object
 9   OXYTOCIN                    896 non-null    object
 10  FetalINTRAPARTUMpH          896 non-null    object
 11  TYPEOFBIRTH                 896 non-null    object
dtypes: int64(1), object(11)
memory usage: 91.0+ KB


### Checking NULL values

In [22]:
h_df.isnull().sum()

PREVIOUSCESAREAN              0
COMPLICATIONS                 0
ROBSONGROUP                   0
ARTMODE                       0
PREVIOUSPRETERMPREGNANCIES    0
AMNIOCENTESIS                 0
PREINDUCTION                  0
INDUCTION                     0
EPISIOTOMY                    0
OXYTOCIN                      0
FetalINTRAPARTUMpH            0
TYPEOFBIRTH                   0
dtype: int64

In [23]:
h_df.TYPEOFBIRTH.unique()

array(['emergencyc-section', 'CESprogrammed', 'EUT', 'vacum', 'FORC',
       'ESP', 'NALGASVAGINAL'], dtype=object)

In [24]:
a = h_df['TYPEOFBIRTH'].values.tolist()
s = set(a)
for x in s:
  print(x,":",a.count(x))

FORC : 38
CESprogrammed : 67
EUT : 357
NALGASVAGINAL : 6
emergencyc-section : 200
ESP : 29
vacum : 199


In [25]:
h_df

Unnamed: 0,PREVIOUSCESAREAN,COMPLICATIONS,ROBSONGROUP,ARTMODE,PREVIOUSPRETERMPREGNANCIES,AMNIOCENTESIS,PREINDUCTION,INDUCTION,EPISIOTOMY,OXYTOCIN,FetalINTRAPARTUMpH,TYPEOFBIRTH
0,f,f,group2a,FIV,0,f,t,t,f,t,f,emergencyc-section
1,f,f,group6,FIV,0,f,f,f,f,f,f,CESprogrammed
2,f,f,group1,FIV,0,f,f,f,f,t,f,EUT
3,t,f,group5,FIV,0,f,f,f,f,f,f,EUT
4,f,f,group1,FIV,0,f,f,f,f,t,f,EUT
...,...,...,...,...,...,...,...,...,...,...,...,...
6143,f,f,group2b,Ovodón,0,f,f,f,f,f,f,CESprogrammed
6149,f,f,group2a,FIV,0,f,f,t,f,f,f,FORC
6152,f,f,group8,FIV,0,t,f,f,t,f,f,EUT
6154,f,f,group1,IAD,0,f,f,f,f,f,f,EUT


In [26]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()

#new_df1.Publisher = new_df1.Publisher.astype(str)

for column in h_df.columns:
    temp_new = le.fit_transform(h_df[column].astype('category'))
    h_df.drop(labels=[column], axis="columns", inplace=True)
    h_df[column] = temp_new

X_h = h_df.loc[:,'PREVIOUSCESAREAN':'FetalINTRAPARTUMpH']
Y_h = h_df.loc[:,'TYPEOFBIRTH':]

In [27]:
from imblearn.over_sampling import ADASYN 
sm = ADASYN(sampling_strategy='minority', random_state=None, n_neighbors=5, n_jobs=1)
X_adassin_h, Y_adassin_h = sm.fit_resample(X_h, Y_h)
print(X_adassin_h.shape)
print(Y_adassin_h.shape)

(1248, 11)
(1248, 1)


In [28]:
from sklearn.model_selection import train_test_split
X_train_h, X_test_h, Y_train_h, Y_test_h = train_test_split(X_adassin_h,Y_adassin_h,test_size=0.20,random_state=None)

In [29]:
X_train_h.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 998 entries, 599 to 994
Data columns (total 11 columns):
 #   Column                      Non-Null Count  Dtype
---  ------                      --------------  -----
 0   PREVIOUSCESAREAN            998 non-null    int64
 1   COMPLICATIONS               998 non-null    int64
 2   ROBSONGROUP                 998 non-null    int64
 3   ARTMODE                     998 non-null    int64
 4   PREVIOUSPRETERMPREGNANCIES  998 non-null    int64
 5   AMNIOCENTESIS               998 non-null    int64
 6   PREINDUCTION                998 non-null    int64
 7   INDUCTION                   998 non-null    int64
 8   EPISIOTOMY                  998 non-null    int64
 9   OXYTOCIN                    998 non-null    int64
 10  FetalINTRAPARTUMpH          998 non-null    int64
dtypes: int64(11)
memory usage: 93.6 KB


In [30]:
Y_train_h.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 998 entries, 599 to 994
Data columns (total 1 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   TYPEOFBIRTH  998 non-null    int64
dtypes: int64(1)
memory usage: 15.6 KB


In [31]:
X_test_h.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 250 entries, 498 to 479
Data columns (total 11 columns):
 #   Column                      Non-Null Count  Dtype
---  ------                      --------------  -----
 0   PREVIOUSCESAREAN            250 non-null    int64
 1   COMPLICATIONS               250 non-null    int64
 2   ROBSONGROUP                 250 non-null    int64
 3   ARTMODE                     250 non-null    int64
 4   PREVIOUSPRETERMPREGNANCIES  250 non-null    int64
 5   AMNIOCENTESIS               250 non-null    int64
 6   PREINDUCTION                250 non-null    int64
 7   INDUCTION                   250 non-null    int64
 8   EPISIOTOMY                  250 non-null    int64
 9   OXYTOCIN                    250 non-null    int64
 10  FetalINTRAPARTUMpH          250 non-null    int64
dtypes: int64(11)
memory usage: 23.4 KB


In [32]:
Y_test_h.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 250 entries, 498 to 479
Data columns (total 1 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   TYPEOFBIRTH  250 non-null    int64
dtypes: int64(1)
memory usage: 3.9 KB


In [33]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

model = DecisionTreeClassifier(criterion = 'entropy')
model.fit(X_train_h,Y_train_h)
y_pred_h = model.predict(X_test_h)
print(accuracy_score(Y_test_h,y_pred_h))
print(precision_score(Y_test_h,y_pred_h,pos_label='positive',average='weighted'))
print(recall_score(Y_test_h,y_pred_h,pos_label='positive',average='micro'))
print(f1_score(Y_test_h, y_pred_h,average='micro'))

0.456
0.45799325272428715
0.456
0.456




In [34]:
from sklearn.ensemble import RandomForestClassifier

RFC = RandomForestClassifier()
RFC.fit(X_train_h, Y_train_h.values.ravel())
y_pred_h = RFC.predict(X_test_h)
print(accuracy_score(Y_test_h,y_pred_h))
print(precision_score(Y_test_h,y_pred_h,pos_label='positive',average='weighted'))
print(recall_score(Y_test_h,y_pred_h,pos_label='positive',average='micro'))
print(f1_score(Y_test_h, y_pred_h,average='micro'))

0.492
0.4868792501616031
0.492
0.492




In [35]:
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

svc = SVC(gamma = 0.001, C = 1000)
y_pred_h = svc.fit(X_train_h, Y_train_h.values.ravel()).predict(X_test_h)
print(accuracy_score(Y_test_h,y_pred_h))
print(precision_score(Y_test_h,y_pred_h,pos_label='positive',average='weighted'))
print(recall_score(Y_test_h,y_pred_h,pos_label='positive',average='micro'))
print(f1_score(Y_test_h, y_pred_h,average='micro'))

0.62
0.5902650144592251
0.62
0.62


  _warn_prf(average, modifier, msg_start, len(result))


In [36]:
from sklearn.neighbors import KNeighborsClassifier

K = []
training = []
test = []
scores = {}
  
for k in range(2, 21):
    clf = KNeighborsClassifier(n_neighbors = k)
    clf.fit(X_train_h, Y_train_h.values.ravel())
  
    training_score = clf.score(X_train_h, Y_train_h)
    test_score = clf.score(X_test_h, Y_test_h)
    K.append(k)
  
    training.append(training_score)
    test.append(test_score)
    scores[k] = [training_score, test_score]
for keys, values in scores.items():
    print(keys, ':', values)

2 : [0.6442885771543087, 0.456]
3 : [0.6452905811623246, 0.456]
4 : [0.6472945891783567, 0.472]
5 : [0.6503006012024048, 0.476]
6 : [0.6382765531062125, 0.5]
7 : [0.6362725450901804, 0.512]
8 : [0.6202404809619239, 0.512]
9 : [0.6112224448897795, 0.516]
10 : [0.6192384769539078, 0.532]
11 : [0.6182364729458918, 0.5]
12 : [0.6112224448897795, 0.512]
13 : [0.6142284569138277, 0.496]
14 : [0.6092184368737475, 0.524]
15 : [0.6072144288577155, 0.516]
16 : [0.5901803607214429, 0.544]
17 : [0.6002004008016032, 0.544]
18 : [0.6002004008016032, 0.536]
19 : [0.5961923847695391, 0.524]
20 : [0.5891783567134269, 0.528]


In [37]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=2,weights='distance',p=1)
model.fit(X_train_h,Y_train_h.values.ravel())
y_pred_h = model.predict(X_test_h)
print(accuracy_score(Y_test_h,y_pred_h))
print(precision_score(Y_test_h,y_pred_h,pos_label='positive',average='weighted'))
print(recall_score(Y_test_h,y_pred_h,pos_label='positive',average='micro'))
print(f1_score(Y_test_h, y_pred_h,average='micro'))

0.452
0.4692283613445378
0.452
0.452




In [38]:
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import StackingClassifier

estimators = [('rf', RandomForestClassifier()),('svr', make_pipeline(StandardScaler(),LinearSVC())),('dt',DecisionTreeClassifier(criterion = 'entropy')),('knn',KNeighborsClassifier(n_neighbors=2,weights='distance',p=1))]
clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
clf.fit(X_train_h, Y_train_h.values.ravel())
y_pred_h = clf.predict(X_test_h)
print(accuracy_score(Y_test_h,y_pred_h))
print(precision_score(Y_test_h,y_pred_h,pos_label='positive',average='weighted'))
print(recall_score(Y_test_h,y_pred_h,pos_label='positive',average='micro'))
print(f1_score(Y_test_h, y_pred_h,average='micro'))



0.628
0.6410628216079544
0.628
0.628


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))
