In [1]:
import os
import logging
import yaml
import json
import requests
import numpy as np
from googleapiclient.discovery import build
from nltk.corpus import stopwords
import pandas as pd
import re
from pymystem3 import Mystem
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import SpectralClustering
from sklearn.metrics import f1_score, silhouette_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import mlflow
from mlflow.tracking import MlflowClient

In [2]:
# –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è –∫–ª–∏–µ–Ω—Ç–∞ YouTube API
def initialize_youtube(YOUTUBE_API_KEY):
    return build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)

# –§—É–Ω–∫—Ü–∏—è –¥–ª—è –ø–æ–ª—É—á–µ–Ω–∏—è ID –≤–∏–¥–µ–æ –ø–æ –∫–ª—é—á–µ–≤—ã–º —Å–ª–æ–≤–∞–º
def get_video_ids(youtube, query, count_video=10):
    """
    –ü–æ–∏—Å–∫ –≤–∏–¥–µ–æ –ø–æ –∫–ª—é—á–µ–≤—ã–º —Å–ª–æ–≤–∞–º
    """
    search_response = youtube.search().list(
        q=query,
        part='id',
        maxResults=count_video,
        type='video'
    ).execute()
    
    video_ids = [item['id']['videoId'] for item in search_response.get('items', [])]
    return video_ids

# –ü–æ–ª—É—á–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö –æ –∫–æ–º–º–µ–Ω—Ç–∞—Ä–∏—è—Ö
def get_data(YOUTUBE_API_KEY, videoId, maxResults, nextPageToken):
    """
    –ü–æ–ª—É—á–µ–Ω–∏–µ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏ —Å–æ —Å—Ç—Ä–∞–Ω–∏—Ü—ã —Å –≤–∏–¥–µ–æ
    """
    YOUTUBE_URI = 'https://www.googleapis.com/youtube/v3/commentThreads?key={KEY}&textFormat=plainText&' + \
        'part=snippet&videoId={videoId}&maxResults={maxResults}&pageToken={nextPageToken}'
    format_youtube_uri = YOUTUBE_URI.format(KEY=YOUTUBE_API_KEY,
                                            videoId=videoId,
                                            maxResults=maxResults,
                                            nextPageToken=nextPageToken)
    content = requests.get(format_youtube_uri).text
    data = json.loads(content)
    return data


def get_text_of_comment(data):
    """
    –ü–æ–ª—É—á–µ–Ω–∏–µ –∫–æ–º–º–µ–Ω—Ç–∞—Ä–∏–µ–≤ –∏–∑ –ø–æ–ª—É—á–µ–Ω–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö –ø–æ–¥ –æ–¥–Ω–∏–º –≤–∏–¥–µ–æ
    """
    comms = set()
    for item in data['items']:
        comm = item['snippet']['topLevelComment']['snippet']['textDisplay']
        comms.add(comm)
    return comms


# –û—Å–Ω–æ–≤–Ω–∞—è —Ñ—É–Ω–∫—Ü–∏—è –¥–ª—è –ø–æ–ª—É—á–µ–Ω–∏—è –≤—Å–µ—Ö –∫–æ–º–º–µ–Ω—Ç–∞—Ä–∏–µ–≤
def get_all_comments(YOUTUBE_API_KEY, query, count_video=10, limit=30, maxResults=10, nextPageToken=''):
    """
    –í—ã–≥—Ä—É–∑–∫–∞ maxResults –∫–æ–º–º–µ–Ω—Ç–∞—Ä–∏–µ–≤
    """
    youtube = initialize_youtube(YOUTUBE_API_KEY)
    videoIds = get_video_ids(youtube, query, count_video)

    comments_all = []
    for id_video in videoIds:
        try:
            data = get_data(YOUTUBE_API_KEY, id_video, maxResults=maxResults, nextPageToken=nextPageToken)
            comment = list(get_text_of_comment(data))
            comments_all.append(comment)
        except Exception as e:
            logging.error(f"Error fetching comments for video ID {id_video}: {e}")
            continue
    comments = sum(comments_all, [])
    return comments

In [30]:
config_path = os.path.join('/Users/forcemajor01/data_science/work_place/other/airflow-mlflow-tutorial/configs/params_all.yaml')
config = yaml.safe_load(open(config_path))['train']
SEED = config['SEED']


In [31]:
config

{'SEED': 10,
 'clustering': {'affinity': 'cosine',
  'count_max_clusters': 15,
  'silhouette_metric': 'euclidean'},
 'comments': {'YOUTUBE_API_KEY': 'AIzaSyBLU5mFczWyGRHq4HLpm9OzENB05l7RP3w',
  'count_video': 50,
  'limit': 30,
  'maxResults': 250,
  'nextPageToken': '',
  'query': '–¥–∞—Ç–∞ —Å–∞–π–µ–Ω—Å'},
 'cross_val': {'test_size': 0.3},
 'dir_folder': '/Users/forcemajor01/data_science/work_place/other/airflow-mlflow-tutorial',
 'model': {'class_weight': 'balanced'},
 'model_lr': 'LogisticRegression',
 'model_vec': 'vector_tfidf',
 'name_experiment': 'my_first',
 'stopwords': 'russian',
 'tf_model': {'max_features': 300}}

In [10]:
comments = get_all_comments(**config['comments'])

In [11]:
comments[:10]

['–¢–∞–∫ –Ω–∞ —Å–∞–º–æ–º –¥–µ–ª–µ',
 '–ù–∏–∫–∞–∫–æ–≥–æ –¥–µ—Ñ–∏—Ü–∏—Ç–∞ –≤ DS –Ω–∞ —Å–∞–º–æ–º –¥–µ–ª–µ –Ω–µ—Ç.\n\n–ß—Ç–æ –∫–∞—Å–∞–µ—Ç—Å—è entry-level –ø–æ–∑–∏—Ü–∏–π, —Ç–æ –æ–Ω–∏ –∑–∞–∫—Ä—ã–≤–∞—é—Ç—Å—è –ª–∏–±–æ –ø–æ –∑–Ω–∞–∫–æ–º—Å—Ç–≤–∞–º, –ª–∏–±–æ —Å –¥–∏—á–∞–π—à–∏–º –∫–æ–Ω–∫—É—Ä—Å–æ–º –≤ –ø–æ–ª—å–∑—É —Ä–µ–±—è—Ç —Å —Ç–æ–ø–æ–≤—ã—Ö –∫–∞—Ñ–µ–¥—Ä –ø–æ –º–∞—Ç–µ—à–µ/–ø—Ä–æ–≥–µ.\n\n –ü—Ä–æ–±–ª–µ–º–∞ —É —Ä–∞–±–æ—Ç–æ–¥–∞—Ç–µ–ª–µ–π —Ä–∞–∑–≤–µ —á—Ç–æ –º–æ–∂–µ—Ç –±—ã—Ç—å, –∫–∞–∫ –∏ –≤ –°–®–ê, –≤ –ø–æ–∏—Å–∫–µ —Å–ø–µ—Ü–æ–≤ –Ω–∞ research –ø–æ–∑–∏—Ü–∏–∏, —Ö–æ—Ç—è –¥–µ–Ω—å–≥–∏ —Ç–∞–º –±–æ–ª—å—à–∏–µ',
 '–†–∞–±–æ—Ç–∞—é –≤ —Å—Ñ–µ—Ä–µ –Ω–µ —Å–≤—è–∑–∞–Ω–Ω–æ–π —Å IT –∏ —Å–æ–≤–µ—Ä—à–µ–Ω–Ω–æ –¥–∞–ª–µ–∫–æ–π –æ—Ç –º–∞—Ç–µ–º–∞—Ç–∏–∫–∏, –∑–∞—Ä–ø–ª–∞—Ç–∞ –º–æ–∏ –ø–æ—Ç—Ä–µ–±–Ω–æ—Å—Ç–∏ –ø–æ–∫—Ä—ã–≤–∞–µ—Ç, –Ω–æ –∑–∞—Ö–æ—Ç–µ–ª–æ—Å—å —á–µ–≥–æ-—Ç–æ –¥—Ä—É–≥–æ–≥–æ - –ø–æ –∏—Ç–æ–≥—É –ø–æ—à–µ–ª –Ω–∞ –∫—É—Ä—Å—ã –ø–æ DS –≤ –æ–¥–Ω—É –∏–∑–≤–µ—Å—Ç–Ω—É—é –∫–æ–Ω—Ç–æ—Ä—É. \n–ó–∞–∫–∞–Ω—á–∏–≤–∞—é 2-–π –≥–æ–¥ –ø–æ CV. –ü—Ä–∏—à–µ–ª –∫ 

In [12]:
def remove_emoji(string):
    """
    –£–¥–∞–ª–µ–Ω–∏–µ —ç–º–æ–¥–∂–∏ –∏–∑ —Ç–µ–∫—Å—Ç–∞
    """
    emoji_pattern = re.compile("["u"\U0001F600-\U0001F64F"  # emoticons
                               u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                               u"\U0001F680-\U0001F6FF"  # transport & map symbols
                               u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                               u"\U00002702-\U000027B0"
                               u"\U000024C2-\U0001F251"
                               u"\U0001f926-\U0001f937"
                               u'\U00010000-\U0010ffff'
                               u"\u200d"
                               u"\u2640-\u2642"
                               u"\u2600-\u2B55"
                               u"\u23cf"
                               u"\u23e9"
                               u"\u231a"
                               u"\u3030"
                               u"\ufe0f"
                               "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', string)


def remove_links(string):
    """
    –£–¥–∞–ª–µ–Ω–∏–µ —Å—Å—ã–ª–æ–∫
    """
    string = re.sub(r'http\S+', '', string)  # remove http links
    string = re.sub(r'bit.ly/\S+', '', string)  # rempve bitly links
    string = re.sub(r'www\S+', '', string)  # rempve bitly links
    string = string.strip('[link]')  # remove [links]
    return string


def preprocessing(string, stopwords, stem):
    """
    –ü—Ä–æ—Å—Ç–æ–π –ø—Ä–µ–ø—Ä–æ—Ü–µ—Å—Å–∏–Ω–≥ —Ç–µ–∫—Å—Ç–∞, –æ—á–∏—Å—Ç–∫–∞, –ª–µ–º–∞—Ç–∏–∑–∞—Ü–∏—è, —É–¥–∞–ª–µ–Ω–∏–µ –∫–æ—Ä–æ—Ç–∫–∏—Ö —Å–ª–æ–≤
    """
    string = remove_emoji(string)
    string = remove_links(string)

    # —É–¥–∞–ª–µ–Ω–∏–µ —Å–∏–º–≤–æ–ª–æ–≤ "\r\n"
    str_pattern = re.compile("\r\n")
    string = str_pattern.sub(r'', string)

    # –æ—á–∏—Å—Ç–∫–∞ —Ç–µ–∫—Å—Ç–∞ –æ—Ç —Å–∏–º–≤–æ–ª–æ–≤
    string = re.sub('(((?![–∞-—è–ê-–Ø ]).)+)', ' ', string)
    # –ª–µ–º–∞—Ç–∏–∑–∞—Ü–∏—è
    string = ' '.join([
        re.sub('\\n', '', ' '.join(stem.lemmatize(s))).strip()
        for s in string.split()
    ])
    # —É–¥–∞–ª—è–µ–º —Å–ª–æ–≤–∞ –∫–æ—Ä–æ—á–µ 3 —Å–∏–º–≤–æ–ª–æ–≤
    string = ' '.join([s for s in string.split() if len(s) > 3])
    # —É–¥–∞–ª—è–µ–º —Å—Ç–æ–ø-—Å–ª–æ–≤–∞
    string = ' '.join([s for s in string.split() if s not in stopwords])
    return string


def get_clean_text(data, stopwords):
    """
    –ü–æ–ª—É—á–µ–Ω–∏–µ —Ç–µ–∫—Å—Ç–∞ –≤ –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–Ω–æ–π –ø–æ—Å–ª–µ –æ—á–∏—Å—Ç–∫–∏
    –º–∞—Ç—Ä–∏—á–Ω–æ–º –≤–∏–¥–µ, –∞ —Ç–∞–∫–∂–µ –º–æ–¥–µ–ª—å –≤–µ–∫—Ç–æ—Ä–∏–∑–∞—Ü–∏–∏
    """
    # –ü—Ä–æ—Å—Ç–æ–π –ø—Ä–µ–ø—Ä–æ—Ü–µ—Å—Å–∏–Ω–≥ —Ç–µ–∫—Å—Ç–∞
    stem = Mystem()
    comments = [preprocessing(x, stopwords, stem) for x in data]
    # –£–¥–∞–ª–µ–Ω–∏–µ –∫–æ–º–º–µ–Ω—Ç–æ–≤, –∫–æ—Ç–æ—Ä—ã–µ –∏–º–µ—é—Ç –º–µ–Ω—å—à–µ, —á–µ–º 5 —Å–ª–æ–≤
    comments = [y for y in comments if len(y.split()) > 5]
    #common_texts = [i.split(' ') for i in comments]
    return comments


def vectorize_text(data, tfidf):
    """
    –ü–æ–ª—É—á–µ–Ω–∏–µ –º–∞—Ç—Ä–∏—Ü—ã –∫–æ–ª-–≤–∞ —Å–ª–æ–≤ –≤ –∫–æ–º–º–µ–Ω–∞—Ä–∏—è—Ö
    –û—á–∏—Å—Ç–∫–∞ –æ—Ç –ø—É—Å—Ç—ã—Ö —Å—Ç—Ä–æ–∫
    """
    # –í–µ–∫—Ç–æ—Ä–∏–∑–∞—Ü–∏—è
    X_matrix = tfidf.transform(data).toarray()
    # –£–¥–∞–ª—è–µ–º —Å—Ç—Ä–æ–∫–∏ –≤ –º–∞—Ç—Ä–∏—Ü–µ —Å –ø—É—Å—Ç—ã–º–∏ –∑–Ω–∞—á–µ–Ω–∏—è–º–∏
    mask = (np.nan_to_num(X_matrix) != 0).any(axis=1)
    return X_matrix[mask]

In [13]:
comments_clean = get_clean_text(comments, stopwords.words(config['stopwords']))
tfidf = TfidfVectorizer(**config['tf_model']).fit(comments_clean)

In [15]:
comments_clean[:10]

['–Ω–∏–∫–∞–∫–æ–π –¥–µ—Ñ–∏—Ü–∏—Ç —Å–∞–º—ã–π –¥–µ–ª–æ –∫–∞—Å–∞—Ç—å—Å—è –ø–æ–∑–∏—Ü–∏—è –∑–∞–∫—Ä—ã–≤–∞—Ç—å—Å—è –ª–∏–±–æ –∑–Ω–∞–∫–æ–º—Å—Ç–≤–æ –ª–∏–±–æ –¥–∏–∫–∏–π –∫–æ–Ω–∫—É—Ä—Å –ø–æ–ª—å–∑–∞ —Ä–µ–±—è—Ç–∞ —Ç–æ–ø–æ–≤—ã–π –∫–∞—Ñ–µ–¥—Ä–∞ –º–∞—Ç–µ—à –ø—Ä–æ–≥–∞ –ø—Ä–æ–±–ª–µ–º–∞ —Ä–∞–±–æ—Ç–æ–¥–∞—Ç–µ–ª—å –ø–æ–∏—Å–∫ —Å–ø–µ—Ü –ø–æ–∑–∏—Ü–∏—è —Ö–æ—Ç—è –¥–µ–Ω—å–≥–∏ –±–æ–ª—å—à–æ–π',
 '—Ä–∞–±–æ—Ç–∞—Ç—å —Å—Ñ–µ—Ä–∞ —Å–≤—è–∑—ã–≤–∞—Ç—å —Å–æ–≤–µ—Ä—à–µ–Ω–Ω–æ –¥–∞–ª–µ–∫–∏–π –º–∞—Ç–µ–º–∞—Ç–∏–∫ –∑–∞—Ä–ø–ª–∞—Ç–∞ –ø–æ—Ç—Ä–µ–±–Ω–æ—Å—Ç—å –ø–æ–∫—Ä—ã–≤–∞—Ç—å –∑–∞—Ö–æ—Ç–µ—Ç—å—Å—è –∏—Ç–æ–≥ –ø–æ–π—Ç–∏ –∫—É—Ä—Å—ã –∏–∑–≤–µ—Å—Ç–Ω—ã–π –∫–æ–Ω—Ç–æ—Ä–∞ –∑–∞–∫–∞–Ω—á–∏–≤–∞—Ç—å –ø—Ä–∏—Ö–æ–¥–∏—Ç—å –≤—ã–≤–æ–¥ –Ω—É–∂–Ω—ã–π –≤—ã—à–∫–∞ –∏–Ω–∞—á–µ –ø—Ä–æ–±–ª–µ–º–∞ –ø–æ–∏—Å–∫ –Ω–æ–≤—ã–π —Ä–∞–±–æ—Ç–∞ —Å–¥–∞–≤–∞—Ç—å —ç–∫–∑–∞–º–µ–Ω –ø–æ—Å—Ç—É–ø–∞—Ç—å –º–∞–≥–∏—Å—Ç—Ä–∞—Ç—É—Ä–∞ –æ—á–µ–Ω—å –∏–∑–≤–µ—Å—Ç–Ω—ã–π —Ä–µ–∑—É–ª—å—Ç–∞—Ç —ç–∫–∑–∞–º–µ–Ω –≤–µ—Ä—Ö–Ω–∏–π –∞–±–∏—Ç—É—Ä–∏–µ–Ω—Ç —Å–µ–Ω—Ç—è–±—Ä—å –Ω–∞—á–∏–Ω–∞—Ç—å—Å—è —É—á–µ–±–∞ –Ω–∞–ø—Ä–∞–≤–ª–µ–Ω–∏–µ

In [17]:
X_matrix = vectorize_text(comments_clean, tfidf)

In [18]:
X_matrix.shape

(1132, 300)

In [20]:
tfidf.get_feature_names_out()[:10]

array(['–∞–≤—Ç–æ—Ä', '–∞–ª–≥–æ—Ä–∏—Ç–º', '–∞–Ω–∞–ª–∏–∑', '–∞–Ω–∞–ª–∏—Ç–∏–∫', '–∞–Ω–∞–ª–∏—Ç–∏–∫–∞',
       '–∞–Ω–∞—Å—Ç–∞—Å–∏—è', '–∞–Ω–≥–ª–∏–π—Å–∫–∏–π', '–±–∞–±—É—à–∫–∏–Ω', '–±–∞–∑–∞', '–±–∞–∑–æ–≤—ã–π'],
      dtype=object)

In [23]:
def get_clusters(data, count_max_clusters, random_state, affinity,
                 silhouette_metric):
    """
    –ü–æ–¥–±–æ—Ä –Ω–∞–∏–ª—É—á—à–µ–≥–æ —á–∏—Å–ª–∞ –∫–ª–∞—Å—Ç–µ—Ä–æ–≤, –≤–æ–∑–≤—Ä–∞—â–∞–µ—Ç –ø–æ–ª—É—á–µ–Ω–Ω—ã–µ –∫–ª–∞—Å—Ç–µ—Ä–∞ —Ç–µ–º–∞—Ç–∏–∫
    """
    cluster_labels = {}
    silhouette_mean = []

    for i in range(2, count_max_clusters, 1):
        clf = SpectralClustering(n_clusters=i,
                                 affinity=affinity,
                                 random_state=random_state)
        #clf = KMeans(n_clusters=n, max_iter=1000, n_init=1)
        clf.fit(data)
        labels = clf.labels_
        cluster_labels[i] = labels
        silhouette_mean.append(
            silhouette_score(data, labels, metric=silhouette_metric))
    n_clusters = silhouette_mean.index(max(silhouette_mean)) + 2
    return cluster_labels[n_clusters]


def get_f1_score(y_test, y_pred, unique_cluster_labels):
    """
    –í–æ–∑—Ä–∞—â–∞–µ—Ç —Ä–µ–∑—É–ª—å—Ç–∞—Ç –æ–±—É—á–µ–Ω–∏—è –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä–∞ –ø–æ —Ç–µ–º–∞—Ç–∏–∫–∞–º
    """
    return f1_score(
        y_test, y_pred,
        average='macro') \
        if len(unique_cluster_labels) > 2 \
        else f1_score(y_test, y_pred)

In [24]:
cluster_labels = get_clusters(X_matrix,
                                 random_state=SEED,
                                 **config['clustering'])

In [32]:
config

{'SEED': 10,
 'clustering': {'affinity': 'cosine',
  'count_max_clusters': 15,
  'silhouette_metric': 'euclidean'},
 'comments': {'YOUTUBE_API_KEY': 'AIzaSyBLU5mFczWyGRHq4HLpm9OzENB05l7RP3w',
  'count_video': 50,
  'limit': 30,
  'maxResults': 250,
  'nextPageToken': '',
  'query': '–¥–∞—Ç–∞ —Å–∞–π–µ–Ω—Å'},
 'cross_val': {'test_size': 0.3},
 'dir_folder': '/Users/forcemajor01/data_science/work_place/other/airflow-mlflow-tutorial',
 'model': {'class_weight': 'balanced'},
 'model_lr': 'LogisticRegression',
 'model_vec': 'vector_tfidf',
 'name_experiment': 'my_first',
 'stopwords': 'russian',
 'tf_model': {'max_features': 300}}

In [26]:
cluster_labels[:10]

array([ 6,  2,  2,  3,  9, 10,  2,  2,  2,  2], dtype=int32)

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X_matrix,
                                                    cluster_labels,
                                                    **config['cross_val'],
                                                    random_state=SEED)

In [28]:
clf_lr = LogisticRegression(**config['model'])

In [29]:
%%bash
export MLFLOW_REGISTRY_URI=../mlflow

In [33]:
mlflow.set_tracking_uri("http://localhost:5001")
mlflow.set_experiment(config['name_experiment'])
with mlflow.start_run():
    clf_lr.fit(X_train, y_train)
    print(clf_lr.predict_proba(X_test))

    # –õ–æ–≥–∏—Ä–æ–≤–∞–Ω–∏–µ –º–æ–¥–µ–ª–∏ –∏ –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤
    mlflow.log_param(
        'f1', get_f1_score(y_test, clf_lr.predict(X_test),
                           set(cluster_labels)))
    mlflow.sklearn.log_model(
        tfidf,
        artifact_path="vector",
        registered_model_name=f"{config['model_vec']}")
    mlflow.sklearn.log_model(
        clf_lr,
        artifact_path='model_lr',
        registered_model_name=f"{config['model_lr']}")
    mlflow.end_run()

2024/10/14 13:27:33 INFO mlflow.tracking.fluent: Experiment with name 'my_first' does not exist. Creating a new experiment.


[[0.1019415  0.03471452 0.07495948 ... 0.03793634 0.20496989 0.05223601]
 [0.01806405 0.00804515 0.02716471 ... 0.00960577 0.28788865 0.01547912]
 [0.06885996 0.02905023 0.07846428 ... 0.0304531  0.04241483 0.04172083]
 ...
 [0.04285026 0.1252693  0.07856961 ... 0.04306601 0.04882859 0.04683564]
 [0.03794967 0.01268974 0.12699254 ... 0.02364957 0.02531088 0.06697331]
 [0.00598721 0.00329804 0.01601326 ... 0.00441986 0.02246577 0.00737974]]


Successfully registered model 'vector_tfidf'.
2024/10/14 13:27:36 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: vector_tfidf, version 1
Created version '1' of model 'vector_tfidf'.
Successfully registered model 'LogisticRegression'.
2024/10/14 13:27:37 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: LogisticRegression, version 1
Created version '1' of model 'LogisticRegression'.
2024/10/14 13:27:37 INFO mlflow.tracking._tracking_service.client: üèÉ View run indecisive-sloth-676 at: http://localhost:5001/#/experiments/1/runs/eade7c2e56c440469bb41cf18f4b75cc.
2024/10/14 13:27:37 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://localhost:5001/#/experiments/1.


In [34]:
mlflow.get_artifact_uri()

'/Users/forcemajor01/data_science/work_place/other/airflow-mlflow-tutorial/mlflow/1/96911c12d2284b2bbffff034b26b8e93/artifacts'

In [35]:
def get_version_model(config_name, client):
    """
    –ü–æ–ª—É—á–µ–Ω–∏–µ –ø–æ—Å–ª–µ–¥–Ω–µ–π –≤–µ—Ä—Å–∏–∏ –º–æ–¥–µ–ª–∏ –∏–∑ MLFlow
    """
    dict_push = {}
    for count, value in enumerate(
        client.search_model_versions(f"name='{config_name}'")):
        # client.list_registered_models()):
        # –í—Å–µ –≤–µ—Ä—Å–∏–∏ –º–æ–¥–µ–ª–∏
        dict_push[count] = value
    return dict(list(dict_push.items())[-1][1])['version']

In [36]:
client = MlflowClient()
last_version_lr = get_version_model(config['model_lr'], client)
last_version_vec = get_version_model(config['model_vec'], client)

In [37]:
last_version_lr

'1'

In [38]:
last_version_vec

'1'