In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import log_loss, roc_auc_score, recall_score, precision_score, average_precision_score, f1_score, classification_report, accuracy_score, plot_roc_curve, plot_precision_recall_curve, plot_confusion_matrix

In [2]:
data = pd.read_excel(r"C:\Users\gog10\GfK Spectacle\GfK-Spectacle\GfK_spectacle_lenses_data.xlsx")

In [3]:
#Replace null values in the data
data.loc[data['main_text'].isnull(), 'main_text'] = 'Not Known'
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3549 entries, 0 to 3548
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   item_id         3549 non-null   int64 
 1   main_text       3549 non-null   object
 2   country_name    3549 non-null   object
 3   retailer_pg     3549 non-null   object
 4   mdm_brand_text  3549 non-null   object
 5   mdm_model_text  3549 non-null   object
dtypes: int64(1), object(5)
memory usage: 166.5+ KB


In [4]:
#Extract first word from model text (post prediction) to make model text more meaningful
data['make'] = data['mdm_model_text'].str.split(' ').str[0]
data

Unnamed: 0,item_id,main_text,country_name,retailer_pg,mdm_brand_text,mdm_model_text,make
0,138176095,ID1.60LIFESTL3IURBAN(RX),SPAIN,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX
1,138176095,ID1.60LIFESTL3IURBAN(RX),SPAIN,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX
2,138176095,ID1.60LIFESTL3IURBAN(RX),SPAIN,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX
3,138176095,ID1.60LIFESTL3IURBAN(RX),SPAIN,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX
4,138176095,ID1.60LIFESTL3IURBAN(RX),SPAIN,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX
...,...,...,...,...,...,...,...
3544,25278162,CRISTALES 34212 MI MF XPERIENCE 1.5 BASIC HV P...,SPAIN,CRISTALES,HOYA,HOYALUX SUMMIT PRO 1.5,HOYALUX
3545,115519055,CRISTALES 34186 MI HILUX 1.6 SHV FAB MONOFOCAL...,SPAIN,CRISTALES,HOYA,HILUX EYAS 1.6 SHV,HILUX
3546,112904161,CRISTALES 34317 NULUX ACTIVE TF 1.5 HV ORGÁNIC...,SPAIN,CRISTALES,HOYA,NULUX ACTIVE TF 1.50 HV,NULUX
3547,138176095,CRISTALES 34244 MI LIFESTYLE 3I 1.5 HVLL BC PR...,SPAIN,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX


In [5]:
#Denote country name into numerical categories
from sklearn.preprocessing import LabelEncoder
l = LabelEncoder()
categorical_col = ['country_name']
for col in categorical_col:
    data[col] = l.fit_transform(data[col])
data.head()

Unnamed: 0,item_id,main_text,country_name,retailer_pg,mdm_brand_text,mdm_model_text,make
0,138176095,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX
1,138176095,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX
2,138176095,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX
3,138176095,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX
4,138176095,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX


In [6]:
#Split dataframe into countries
data_germany = data[data['country_name'] == 0]
data_spain = data[data['country_name'] == 1]
data_spain.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2270 entries, 0 to 3548
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   item_id         2270 non-null   int64 
 1   main_text       2270 non-null   object
 2   country_name    2270 non-null   int32 
 3   retailer_pg     2270 non-null   object
 4   mdm_brand_text  2270 non-null   object
 5   mdm_model_text  2270 non-null   object
 6   make            2270 non-null   object
dtypes: int32(1), int64(1), object(5)
memory usage: 133.0+ KB


In [7]:
#Check item_id counts for Spain only
item_id_count = data_spain['item_id'].value_counts()
item_id_count

82981040     290
138176095    226
87099837     116
88210952      71
79429232      50
            ... 
98426893       3
117860560      3
80508911       2
119539790      1
72145121       1
Name: item_id, Length: 128, dtype: int64

In [8]:
data_spain.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2270 entries, 0 to 3548
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   item_id         2270 non-null   int64 
 1   main_text       2270 non-null   object
 2   country_name    2270 non-null   int32 
 3   retailer_pg     2270 non-null   object
 4   mdm_brand_text  2270 non-null   object
 5   mdm_model_text  2270 non-null   object
 6   make            2270 non-null   object
dtypes: int32(1), int64(1), object(5)
memory usage: 133.0+ KB


In [9]:
#Counts per category for brand text (post prediction)
data_spain['mdm_brand_text'].value_counts()

ESSILOR           1010
HOYA               588
INDO               292
ZEISS              157
PRATS               57
SHAMIR              42
MEDICAL VISION      30
KODAK               19
NIKON               11
RAY BAN             10
EYEART              10
TOKAI                9
SYNCHRONY            8
L.O.A.               7
AMERICAN OPTIC       7
BBGR                 7
RODENSTOCK           6
Name: mdm_brand_text, dtype: int64

In [10]:
#Check value counts of this new column
data_spain['make'].value_counts().head(30)

VARILUX        384
ORMIX          340
HOYALUX        268
HILUX          151
INDOSOL        116
EYEZEN         107
PROGRESSIVE     74
ORG             67
MAXXEE          50
ACTIVA          43
BALANSIS        40
AIRWEAR         35
NEW             34
NULUX           31
ORMA            31
OR              30
UNIFOCAL        30
INDOFIN         27
INDIVIDUAL      26
ESTELUX         24
FLINT           24
LINEIS          22
TELEGRAND       21
MASTERLUX       20
SUPERLITE       19
UNIQUE          19
NATURAL         18
C               17
NEO             15
EASYMAX         12
Name: make, dtype: int64

In [11]:
data_spain.loc[data['main_text'].str.contains('ESSILOR'), 'brand_model'] = 'ESSILOR' 
data_spain.loc[data['main_text'].str.contains('HOYA'), 'brand_model'] = 'HOYA'
data_spain.loc[data['main_text'].str.contains('INDO'), 'brand_model'] = 'INDO'
data_spain.loc[data['main_text'].str.contains('ZEISS'), 'brand_model'] = 'ZEISS'
data_spain.loc[data['main_text'].str.contains('PRATS'), 'brand_model'] = 'PRATS' 
data_spain.loc[data['main_text'].str.contains('SHAMIR'), 'brand_model'] = 'SHAMIR' 
data_spain.loc[data['main_text'].str.contains('MEDICAL VISION'), 'brand_model'] = 'MEDICAL VISION'
data_spain.loc[data['main_text'].str.contains('KODAK'), 'brand_model'] = 'KODAK'
data_spain.loc[data['main_text'].str.contains('NIKON'), 'brand_model'] = 'NIKON'
data_spain.loc[data['main_text'].str.contains('RAY BAN'), 'brand_model'] = 'RAY BAN'
data_spain.loc[data['main_text'].str.contains('EYEART'), 'brand_model'] = 'EYEART'   
data_spain.loc[data['main_text'].str.contains('TOKAI'), 'brand_model'] = 'TOKAI'
data_spain.loc[data['main_text'].str.contains('SYNCHRONY'), 'brand_model'] = 'SYNCHRONY'
data_spain.loc[data['main_text'].str.contains('L.O.A.'), 'brand_model'] = 'L.O.A.'
data_spain.loc[data['main_text'].str.contains('AMERICAN OPTIC '), 'brand_model'] = 'AMERICAN OPTIC '
data_spain.loc[data['main_text'].str.contains('BBGR'), 'brand_model'] = 'BBGR'
data_spain.loc[data['main_text'].str.contains('RODENSTOCK'), 'brand_model'] = 'RODENSTOCK'
data_spain.loc[data['make'].str.contains('VARILUX'), 'brand_model'] = 'VARILUX' 
data_spain.loc[data['make'].str.contains('ORMIX'), 'brand_model'] = 'ORMIX'  
data_spain.loc[data['make'].str.contains('HOYALUX'), 'brand_model'] = 'HOYALUX' 
data_spain.loc[data['make'].str.contains('HILUX'), 'brand_model'] = 'HILUX' 
data_spain.loc[data['make'].str.contains('INDOSOL'), 'brand_model'] = 'INDOSOL' 
data_spain.loc[data['make'].str.contains('EYEZEN'), 'brand_model'] = 'EYEZEN'
data_spain.loc[data['make'].str.contains('PROGRESSIVE'), 'brand_model'] = 'PROGRESSIVE' 
data_spain.loc[data['make'].str.contains('ORG'), 'brand_model'] = 'ORG' 
data_spain.loc[data['make'].str.contains('MAXXEE'), 'brand_model'] = 'MAXXEE' 
data_spain.loc[data['make'].str.contains('ACTIVA'), 'brand_model'] = 'ACTIVA' 
data_spain.loc[data['make'].str.contains('BALANSIS'), 'brand_model'] = 'BALANSIS' 
data_spain.loc[data['make'].str.contains('AIRWEAR'), 'brand_model'] = 'AIRWEAR' 
data_spain.loc[data['make'].str.contains('NEW'), 'brand_model'] = 'NEW' 
data_spain.loc[data['make'].str.contains('NULUX'), 'brand_model'] = 'NULUX'
data_spain.loc[data['make'].str.contains('ORMA'), 'brand_model'] = 'ORMA' 
data_spain.loc[data['make'].str.contains('OR'), 'brand_model'] = 'OR'
data_spain.loc[data['make'].str.contains('UNIFOCAL'), 'brand_model'] = 'UNIFOCAL'
data_spain.loc[data['make'].str.contains('INDOFIN'), 'brand_model'] = 'INDOFIN'
data_spain.loc[data['make'].str.contains('INDIVIDUAL'), 'brand_model'] = 'INDIVIDUAL'
data_spain.loc[data['make'].str.contains('ESTELUX'), 'brand_model'] = 'ESTELUX'
data_spain.loc[data['make'].str.contains('FLINT'), 'brand_model'] = 'FLINT'
data_spain.loc[data['make'].str.contains('LINEIS'), 'brand_model'] = 'LINEIS' 
data_spain.loc[data['make'].str.contains('TELEGRAND'), 'brand_model'] = 'TELEGRAND'
data_spain.loc[data['make'].str.contains('MASTERLUX '), 'brand_model'] = 'MASTERLUX' 
data_spain

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Unnamed: 0,item_id,main_text,country_name,retailer_pg,mdm_brand_text,mdm_model_text,make,brand_model
0,138176095,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,HOYALUX
1,138176095,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,HOYALUX
2,138176095,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,HOYALUX
3,138176095,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,HOYALUX
4,138176095,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,HOYALUX
...,...,...,...,...,...,...,...,...
3544,25278162,CRISTALES 34212 MI MF XPERIENCE 1.5 BASIC HV P...,1,CRISTALES,HOYA,HOYALUX SUMMIT PRO 1.5,HOYALUX,HOYALUX
3545,115519055,CRISTALES 34186 MI HILUX 1.6 SHV FAB MONOFOCAL...,1,CRISTALES,HOYA,HILUX EYAS 1.6 SHV,HILUX,HILUX
3546,112904161,CRISTALES 34317 NULUX ACTIVE TF 1.5 HV ORGÁNIC...,1,CRISTALES,HOYA,NULUX ACTIVE TF 1.50 HV,NULUX,NULUX
3547,138176095,CRISTALES 34244 MI LIFESTYLE 3I 1.5 HVLL BC PR...,1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,HOYALUX


In [12]:
data_spain['brand_model'].count()

2087

In [13]:
#Check value counts of retailer product group (pre prediction) column
data_spain['retailer_pg'].value_counts().head(30)

CRISTALES                       900
LENTES                          304
LENTE                           177
UNASSIGNED                      168
LENTES OFTALMICAS               124
LENTE|ORGANICA|PROGRESIVA        83
LENTE|ORGANICA|MONOFOCAL         80
LENTES//ORGANICA MONOFOCAL       64
LENTES//ORGANICA PROGRESIVA      40
?253                             31
LENTES ORGANICA MONOFOCAL        30
SF06                             30
LENTE|ORGANICA PROGRESIVA        28
LENTE|ORGANICA MONOFOCAL         26
VIDRES                           25
LENTES|ORGANICA MONOFOCAL        24
LENTES OFTÂŒMICAS                21
LENTES/CRISTALES                 19
LENTES|ORGANICA PROGRESIVA       13
MONOFOCAL                        12
LENTES ORGANICA PROGRESIVA       11
PROGRESIVO                       10
722803 - LENTES PROMOCION         6
LENTE|MONOFOCAL                   5
LENTES//MONOFOCAL//ORGÁNICA       5
LENTES OFTÁLMICAS                 5
LENTE|MINERAL|MONOFOCAL           4
LENTES//PROGRESIVA//ORGÁNICA

In [14]:
#Extract common words from retailer_pg column to group categories together and make data more meaningful
data_spain.loc[data['retailer_pg'].str.contains('CRISTALES'), 'product_group'] = 'CRISTALES' 
data_spain.loc[data['retailer_pg'].str.contains('OFTALMICAS'), 'product_group'] = 'OFTALMICAS'
data_spain.loc[data['retailer_pg'].str.contains('PROGRESIV'), 'product_group'] = 'PROGRESIVA' 
data_spain.loc[data['retailer_pg'].str.contains('MONOFOCAL'), 'product_group'] = 'MONOFOCAL'
data_spain.loc[data['retailer_pg'].str.contains('VIDRES'), 'product_group'] = 'VIDRES' 
data_spain.loc[data['retailer_pg'].str.contains('BIFOCAL'), 'product_group'] = 'BIFOCAL' 
data_spain.loc[data['retailer_pg'].str.contains('PROMOCION'), 'product_group'] = 'PROMOCION'
data_spain.loc[data['retailer_pg'].str.contains('ECI'), 'product_group'] = 'ECI'
data_spain

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Unnamed: 0,item_id,main_text,country_name,retailer_pg,mdm_brand_text,mdm_model_text,make,brand_model,product_group
0,138176095,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,HOYALUX,CRISTALES
1,138176095,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,HOYALUX,CRISTALES
2,138176095,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,HOYALUX,CRISTALES
3,138176095,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,HOYALUX,CRISTALES
4,138176095,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,HOYALUX,CRISTALES
...,...,...,...,...,...,...,...,...,...
3544,25278162,CRISTALES 34212 MI MF XPERIENCE 1.5 BASIC HV P...,1,CRISTALES,HOYA,HOYALUX SUMMIT PRO 1.5,HOYALUX,HOYALUX,CRISTALES
3545,115519055,CRISTALES 34186 MI HILUX 1.6 SHV FAB MONOFOCAL...,1,CRISTALES,HOYA,HILUX EYAS 1.6 SHV,HILUX,HILUX,CRISTALES
3546,112904161,CRISTALES 34317 NULUX ACTIVE TF 1.5 HV ORGÁNIC...,1,CRISTALES,HOYA,NULUX ACTIVE TF 1.50 HV,NULUX,NULUX,CRISTALES
3547,138176095,CRISTALES 34244 MI LIFESTYLE 3I 1.5 HVLL BC PR...,1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,HOYALUX,CRISTALES


In [15]:
#Count missing values
data_spain.isnull().sum()

item_id             0
main_text           0
country_name        0
retailer_pg         0
mdm_brand_text      0
mdm_model_text      0
make                0
brand_model       183
product_group     742
dtype: int64

In [16]:
#Remove all rows with at least one null value
data_spain.dropna(inplace=True)
data_spain.isnull().sum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


item_id           0
main_text         0
country_name      0
retailer_pg       0
mdm_brand_text    0
mdm_model_text    0
make              0
brand_model       0
product_group     0
dtype: int64

In [17]:
#Create numerical values for model
categorical_col = ['item_id']
for col in categorical_col:
    data_spain[col] = l.fit_transform(data_spain[col])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_spain[col] = l.fit_transform(data_spain[col])


In [18]:
#Create numerical values for model
categorical_col = ['brand_model']
for col in categorical_col:
    data_spain[col] = l.fit_transform(data_spain[col])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_spain[col] = l.fit_transform(data_spain[col])


In [19]:
#Create numerical values for model
categorical_col = ['product_group']
for col in categorical_col:
    data_spain[col] = l.fit_transform(data_spain[col])
data_spain.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_spain[col] = l.fit_transform(data_spain[col])


Unnamed: 0,item_id,main_text,country_name,retailer_pg,mdm_brand_text,mdm_model_text,make,brand_model,product_group
0,84,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,9,1
1,84,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,9,1
2,84,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,9,1
3,84,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,9,1
4,84,ID1.60LIFESTL3IURBAN(RX),1,CRISTALES,HOYA,HOYALUX ID LIFESTYLE 3-I HVLL,HOYALUX,9,1


In [20]:
#Delete columns not needed for model
data_spain.drop(['main_text', 'country_name', 'retailer_pg', 'mdm_brand_text', 'mdm_model_text', 'make'], axis = 'columns', inplace=True)
data_spain

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Unnamed: 0,item_id,brand_model,product_group
0,84,9,1
1,84,9,1
2,84,9,1
3,84,9,1
4,84,9,1
...,...,...,...
3544,6,9,1
3545,61,7,1
3546,58,18,1
3547,84,9,1


In [21]:
#Define X and y values for model
X = data_spain.drop('item_id', axis =1)
y = data_spain['item_id']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [22]:
# Standarize features
scaler = StandardScaler()
X_std = scaler.fit_transform(X)

# Create one-vs-rest logistic regression object
clf = LogisticRegression(random_state=0, multi_class='ovr')

# Train model
model = clf.fit(X_std, y)

# Fitting the model with training data
model.fit(X_train, y_train)

# Making a prediction on the test set
prediction = model.predict(X_test)

In [23]:
precision = precision_score(y_test, prediction, average='micro', zero_division=0)
print('Precision score: {0:0.2f}'.format(precision))

recall = recall_score(y_test, prediction, average='micro', zero_division=0)
print('Recall score: {0:0.2f}'.format(recall))

f1 = f1_score(y_test, prediction, average='micro', zero_division=0)
print('f1 score: {0:0.2f}'.format(recall))

Precision score: 0.27
Recall score: 0.27
f1 score: 0.27
