In [1]:
import pickle
import pandas as pd
save_folder = "models"

def load_pipeline(label):
    filename = get_filename(label)
    folder = save_folder
    filepath = folder + "/" + filename
    pipeline = None
    with open(filepath, 'rb') as file:
        pipeline = pickle.load(file)
    return pipeline


def get_filename(topic):
    return topic+"_pipeline.pkl"

In [2]:
import os

os.environ["AWS_ACCESS_KEY_ID"] = "imroot"
os.environ["AWS_SECRET_ACCESS_KEY"] = "beaconpass"
os.environ["MLFLOW_S3_ENDPOINT_URL"] = "http://localhost:9000"

In [3]:
from data import load_data, array_to_df
from sklearn.model_selection import train_test_split
df = load_data("labelled-tweets-20-09-2021.json")
x_raw = df["text"]
y = array_to_df(df["topics"])


In [4]:
from sklearn.pipeline import Pipeline
from text_analysis import TextCleaner
from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
from nltk.corpus import stopwords

prep_pipeline = Pipeline([
    ('text_cleaner', TextCleaner(remove_urls=True)),
    ('vectorizer', CountVectorizer(stop_words=stopwords.words("english"))),
    ('tfidf', TfidfTransformer()),
])

x = prep_pipeline.fit_transform(x_raw)

In [5]:
x_train, x_test, y_train, y_test = train_test_split(x, y["crypto"], test_size=0.25)

In [6]:
y_train


123     False
637     False
531      True
1726     True
800      True
        ...  
238     False
1781     True
1959    False
899      True
929     False
Name: crypto, Length: 1592, dtype: bool

In [7]:
model = load_pipeline("crypto")

FileNotFoundError: [Errno 2] No such file or directory: 'models/crypto_pipeline.pkl'

In [None]:
with mlflow.start_run() as run:
    print(run.info.run_id)
    mlflow.sklearn.log_model(model, "crypto")
    result = mlflow.register_model(
    "runs:/d16076a3ec534311817565e6527539c0/sklearn-model",
    "sk-learn-random-forest-reg"
)
    mlflow.end_run()

In [None]:
model.predict(pd.Series(["wassup"]))

In [None]:
from modAL.models import ActiveLearner
from modAL.uncertainty import classifier_uncertainty

In [None]:
x

In [None]:
from sklearn.svm import SVC, LinearSVC
from mlflow import mlflow,log_metric, log_param, log_artifacts
model = SVC(probability=True)
model.fit([[1,1,1], [0,1,0]], [1,0])
mlflow.set_tracking_uri('http://localhost:5000')
mlflow.set_experiment("my-experiment")
with mlflow.start_run() as run:
    mlflow.sklearn.log_model(model, "mlflow-active-learner")
    mlflow.end_run()

In [None]:
model.predict_proba([[1,1,1]])

In [None]:
model.predict([[1,1,0]])

In [None]:
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
learner = ActiveLearner(
    estimator=model,
    X_training=pd.Series(x_raw), y_training=pd.Series(y["crypto"])
)

In [None]:
with mlflow.start_run() as run:
    mlflow.sklearn.log_model(learner, "learner-model")
    mlflow.end_run()

In [None]:
from mlflow import mlflow,log_metric, log_param, log_artifacts

learner = ActiveLearner(
    estimator=SVC(probability=True),
    X_training=[[1,1,1], [0,1,0]], y_training=[1,0]
)

In [None]:
learner.predict([[1,1,0]])

In [None]:
classifier_uncertainty(learner,[[1,1,0]])

In [None]:
learner.teach([[1,1,0]], [0])

In [None]:
learner.predict([[1,1,0]])

In [None]:
classifier_uncertainty(learner,[[1,1,0]])

In [None]:
learner.query(pd.Series(["Hi does this need to be labbeled"]))

In [None]:
unqueried_score = learner.score(x, y["crypto"])
unqueried_score

In [None]:
classifier_uncertainty(learner, prep_pipeline.transform(pd.Series(["hi, bitcoin", "not about the said subject", "he is uncertain that this giveaway $$"])))

In [None]:
import mlflow.sklearn
from mlflow.models.signature import infer_signature

In [None]:
import os
from random import random, randint

from mlflow import mlflow,log_metric, log_param, log_artifacts


with mlflow.start_run() as run:
    mlflow.set_tracking_uri('http://localhost:5000')
    mlflow.set_experiment("/my-experiment")

    log_param("param1", randint(0, 100))

    log_metric("foo", random())
    log_metric("foo", random() + 1)
    log_metric("foo", random() + 2)

    if not os.path.exists("outputs"):
        os.makedirs("outputs")
    with open("outputs/test.txt", "w") as f:
        f.write("hello world!")

    log_artifacts("outputs")
    mlflow.end_run()

In [None]:
import mlflow.sklearn
from mlflow.models.signature import infer_signature
#signature = infer_signature(pd.Series(["Hi, this is a dumb data"]), model.predict(pd.Series(["Hi, this is a dumb data"])))
#mlflow.sklearn.log_model(model, "model", signature=signature)

In [None]:
test_model = SVC(probability=True)
test_model.fit(x_train, y_train)

In [None]:
test_model.predict(x_test)

In [None]:
#signature = infer_signature(x_test, test_model.predict(x_test))
with mlflow.start_run() as run:
    mlflow.sklearn.log_model(model, "test_model")
    mlflow.end_run()

In [None]:
with mlflow.start_run() as run:
    print(run.info.run_id)
    mlflow.end_run()

In [None]:
for f,s in zip([1,2], [3,4]):
    print(f,s)

In [None]:
json_records = pd.DataFrame(data=[[1,2,3], [2,3,4]], columns=["a", "b", "c"], index=["d", "e"]).to_json(orient="records")
json_records

In [None]:
import json

records = json.loads(json_records)
pd.DataFrame(data=records)

In [None]:
df_json = pd.DataFrame(data=[[1,2,3], [2,3,4]], columns=["a", "b", "c"], index=["d", "e"]).to_json(orient="records")
df_json

In [None]:
pd.read_json(df_json, orient="records")

In [None]:
pd.DataFrame(data=[[1,2,3], [2,3,4]], columns=["a", "b", "v"]).to_dict(orient="index")

In [None]:
pd.DataFrame(data=[[1,2,3], [2,3,4]], columns=["a", "b", "v"]).to_dict()