In [1]:
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [2]:
from sklearn.datasets import load_svmlight_file
from sklearn.metrics import average_precision_score
import sklearn.metrics as sk
import numpy as np

In [5]:
def get_data():
    data = load_svmlight_file("cv5-1.libsvm")
    return data[0], data[1]

In [6]:
X, y = get_data()

In [7]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
# logistic = LogisticRegression()
# penalty = ['l2', 'l1']
# C = [0.1, 1, 0.01, 10.0]
# dual = [True, False]
# class_weight = ['balanced', None]
# solver = ['liblinear', 'lbfgs', 'sag']
# max_iter = [2000]
# hyperparameters = dict(C=C, penalty=penalty, max_iter=max_iter)
# find = GridSearchCV(logistic, hyperparameters, cv=10, verbose=0)
# best_model = find.fit(X, y)

In [9]:
model = LogisticRegression(max_iter=2000, class_weight="balanced", C=0.1)

In [10]:
%%time
model.fit(x_train, y_train)

Wall time: 37 s


LogisticRegression(C=0.1, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=2000, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [11]:
predictions = model.predict(x_test)

In [12]:
prob_pos_clf = model.predict_proba(x_test)[:, 1]

In [13]:
prob_pos_clf

array([0.52673099, 0.52952282, 0.61738208, ..., 0.45415205, 0.32904168,
       0.01913975])

In [14]:
print(predictions)
print( classification_report(y_test, predictions) )
print( accuracy_score(y_test, predictions))

[1. 1. 1. ... 0. 0. 0.]
              precision    recall  f1-score   support

         0.0       1.00      0.67      0.80     62238
         1.0       0.01      0.52      0.01       250

    accuracy                           0.67     62488
   macro avg       0.50      0.60      0.41     62488
weighted avg       0.99      0.67      0.80     62488

0.6709608244782999


In [15]:
average_precision = average_precision_score(y_test, predictions)
average_precision

0.005206548283978025

In [16]:
from sklearn.metrics import recall_score

In [17]:
recall_score(y_test, predictions, average='weighted')

0.6709608244782999

In [18]:
#pandas time
import pandas as pd

In [19]:
name_to_save = "batch.txt"

In [20]:
df = pd.DataFrame({"truth": y_test, "predictions": prob_pos_clf  })
df = df.astype({"truth": int})
df.to_csv(name_to_save, sep=" ", header=None, index=False)

In [21]:
df

Unnamed: 0,truth,predictions
0,0,0.526731
1,0,0.529523
2,0,0.617382
3,0,0.486996
4,0,0.385700
...,...,...
62483,0,0.261733
62484,0,0.486578
62485,0,0.454152
62486,0,0.329042


In [22]:
from skompiler import skompile
import yaml

In [23]:
pfa_json = skompile(model.predict).to('pfa/json')
pfa_just = skompile(model.predict).to('pfa')
pfa_yaml = skompile(model.predict).to('pfa/yaml')



In [24]:
pfa_just

{'input': {'type': 'record',
  'name': 'Input',
  'fields': [{'name': 'x', 'type': {'type': 'array', 'items': 'double'}}]},
 'output': {'type': 'double'},
 'action': {'u.step': {'+': [{'u.vdot': [{'type': {'type': 'array',
        'items': 'double'},
       'value': [-0.5129286673571601,
        -0.27739522706564007,
        -0.5003502949294379,
        -0.8429405676374484,
        0.38134794245883624,
        0.12797433065071004,
        -0.3017365032060635,
        -0.06637870505999018,
        -0.0839166102283698,
        0.6413147839740865,
        0.6893631039701519,
        -0.13624031428253264,
        0.36186398224133254,
        -0.3802457130851197,
        0.05789932260950891,
        -0.28878817110703636,
        0.09725576456059976,
        0.06685899628179312,
        0.3567115901021604,
        0.47286161065956106,
        0.05619207141873824,
        0.3019419092925552,
        -0.1112422249459121,
        -0.11600499314559523,
        -0.09608959821983615,
        -0.25

In [25]:
import json
with open('to_deploy', 'w') as outfile:
    json.dump(pfa_just, outfile)