In [5]:
import random
import numpy as np
import pandas as pd
from deap import base, creator, tools, algorithms
from app.models import *
import joblib
import warnings
import matplotlib.pyplot as plt
from tqdm import tqdm
import concurrent.futures

warnings.filterwarnings('ignore')

In [6]:
new_model = joblib.load("saved_models/MODEL.pkl")

model = new_model.best_estimator_
print(type(model))

<class 'sklearn.linear_model._logistic.LogisticRegression'>


In [7]:
def rank_teams(season, matches, list_team, model_file, num_divisions, min_teams_per_division=3, num_generations=100, population_size=300):
    model = model_file  # Используем загруженную модель
    unique_teams = list_team['ID team'].unique()
    num_teams = len(unique_teams)
    
    if num_teams < num_divisions * min_teams_per_division:
        raise ValueError("Недостаточно команд для распределения по заданному количеству дивизионов")
    
    # Генерация всех возможных матчей
    all_matches = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_match = {
            executor.submit(get_team_win_probability, model, matches, team1, team2): (team1, team2)
            for team1 in unique_teams for team2 in unique_teams if team1 != team2
        }
        for future in tqdm(concurrent.futures.as_completed(future_to_match), total=len(future_to_match), desc="Генерация матчей"):
            team1, team2 = future_to_match[future]
            win_prob1 = future.result()
            win_prob2 = get_team_win_probability(model, matches, team2, team1)
            probabilities = normalize_probabilities([win_prob1, win_prob2])
            all_matches.append([team1, team2, probabilities[0] * 100, probabilities[1] * 100])
    
    matches_df = pd.DataFrame(all_matches, columns=['ID team', 'ID opponent', '%T', '%O'])
    
    # Генетический алгоритм
    creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMin)

    toolbox = base.Toolbox()
    toolbox.register("indices", random.sample, range(num_teams), num_teams)
    toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.indices)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    
    def evaluate(individual):
        divisions = [individual[i::num_divisions] for i in range(num_divisions)]
        if any(len(div) < min_teams_per_division for div in divisions):
            return float('inf'),
        if len(individual) != len(set(individual)):
            return float('inf'),
        
        score = 0
        for div in divisions:
            div_teams = unique_teams[div]
            div_matches = matches_df[(matches_df['ID team'].isin(div_teams)) & (matches_df['ID opponent'].isin(div_teams))]
            max_probs = div_matches[['%T', '%O']].max(axis=1)
            score += max_probs.mean()
        return score / num_divisions,
    
    toolbox.register("mate", tools.cxTwoPoint)
    toolbox.register("mutate", tools.mutShuffleIndexes, indpb=0.05)
    toolbox.register("select", tools.selTournament, tournsize=3)
    toolbox.register("evaluate", evaluate)
    
    population = toolbox.population(n=population_size)
    pbar = tqdm(total=num_generations, desc="Эволюция")
    
    def eval_with_progress(individual):
        result = evaluate(individual)
        pbar.update(1)
        return result
    
    toolbox.register("evaluate", eval_with_progress)
    algorithms.eaSimple(population, toolbox, cxpb=0.5, mutpb=0.2, ngen=num_generations, verbose=False)
    pbar.close()
    
    best_ind = tools.selBest(population, 1)[0]
    divisions = [best_ind[i::num_divisions] for i in range(num_divisions)]
    final_divisions = [[unique_teams[idx], f'Division {i+1}'] for i, div in enumerate(divisions) for idx in div]
    final_df = pd.DataFrame(final_divisions, columns=['ID team', 'division'])
    
    matches_df.to_csv('data/interim/matches_rangirov.csv', index=False)
    final_df.to_csv(f'data/ranking_seasons/team_rangirov_for_{season}.csv', index=False)
    print(f"Количество уникальных команд: {len(final_df)}")



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def evaluate_league_distribution(season, new_model):
    try:
        game_before_season, unique_teams = process_season_data(season)
        if game_before_season is None or unique_teams is None:
            raise ValueError("Ошибка: `process_season_data` вернул None.")

        game_before_season.fillna(0, inplace=True)
        game_before_season.replace([np.inf, -np.inf], 0, inplace=True)

        matches = scale_and_select_features(game_before_season)
        
        if "result" in matches.columns:
            matches["result"] = matches["result"].map({'W': 1, 'L': -1, 'D': 0})
            matches.dropna(inplace=True)
        else:
            raise ValueError("Ошибка: Колонка 'result' отсутствует в данных.")

        division_teams = unique_teams.groupby('division')['ID team'].apply(list).to_dict()
        
        match_results_df = simulate_all_matches(new_model, division_teams, matches)
        average_highest_probabilities = calculate_average_highest_probabilities(match_results_df)
        
        match_results_df.to_csv('data/interim/simulated_matches.csv', index=False)
        #unique_teams.to_csv(f"rank_teams_LLHN_{season}.csv", index=False)

        for division, teams in division_teams.items():
            num_teams = len(teams)
            num_matches = num_teams * (num_teams - 1) // 2
            print(f"Дивизион: {division}, Количество команд: {num_teams}, Количество матчей: {num_matches}")

        print("Среднее значение наибольших вероятностей для каждого дивизиона:")
        print(average_highest_probabilities)

        named_team_id('llhn', unique_teams, season)

    except Exception as ex:
        print(f"Ошибка: {ex}")
    
    return matches, unique_teams, average_highest_probabilities

def named_team_id(name, unique_df_id, season):
    df_team_name = pd.read_csv("data/raw/teams_name_utf8.csv")
    unique_df = unique_df_id["ID team"]
    filtered_name_df = df_team_name[df_team_name["ID team"].isin(unique_df)]
    merged_df = pd.merge(unique_df, filtered_name_df, on='ID team', how='left')
    merged_df.to_csv(f"data/ranking_seasons/ranking_teams_{name}_{season}.csv", index=False)

def rank_and_evaluate_teams(season, new_model, num_divisions=4, min_teams_per_division=5):
    matches, unique_teams, old_probs = evaluate_league_distribution(season, new_model)
    
    # Пересчет рангов
    rank_teams(season, matches, unique_teams, new_model, num_divisions, min_teams_per_division)
    
    team_rangirov = pd.read_csv(f'data/ranking_seasons/team_rangirov_for_{season}.csv')
    division_teams = team_rangirov.groupby('division')['ID team'].apply(list).to_dict()

    match_results_df = simulate_all_matches(new_model, division_teams, matches)
    new_probs = calculate_average_highest_probabilities(match_results_df)

    match_results_df.to_csv('data/interim/simulated_matches.csv', index=False)

    for division, teams in division_teams.items():
        num_teams = len(teams)
        num_matches = num_teams * (num_teams - 1) // 2
        print(f"Дивизион: {division}, Количество команд: {num_teams}, Количество матчей: {num_matches}")

    print("Среднее значение наибольших вероятностей для каждого дивизиона (новая модель):")
    print(new_probs)

    named_team_id('model', team_rangirov, season)

    # Визуализация
    visualize_division_distribution(old_probs, new_probs, season)


def visualize_division_distribution(original_probs, new_probs, season):
    """
    Визуализация сравнения вероятностей побед в дивизионах до и после перераспределения.
    
    :param original_probs: dict | np.ndarray — Исходные вероятности.
    :param new_probs: dict | np.ndarray — Вероятности после перераспределения.
    """

    if isinstance(original_probs, np.ndarray):
        divisions = np.arange(len(original_probs))  # 0, 1, 2, 3...
        original_probs = original_probs.tolist()
    else:
        divisions = sorted(original_probs.keys())
        original_probs = [original_probs[div] for div in divisions]

    if isinstance(new_probs, np.ndarray):
        new_probs = new_probs.tolist()
    else:
        new_probs = [new_probs.get(div, 0) for div in divisions]  # Заполняем возможные пропуски 0

    width = 0.4 
    x = np.arange(len(divisions))

    plt.figure(figsize=(10, 5))
    plt.bar(x - width / 2, original_probs, width, color='blue', label='Исходное распределение')
    plt.bar(x + width / 2, new_probs, width, color='red', label='Распределение по модели')

    plt.xlabel("Дивизионы")
    plt.ylabel("Средняя вероятность победы (%)")
    plt.title("Сравнение распределения команд по дивизионам")
    plt.xticks(x, divisions)
    plt.legend()
    
    plt.savefig(f"data/ranking_seasons/division_comparison_for_{season}.png")
    plt.show()


# Запуск процесса
season = 73
rank_and_evaluate_teams(season, new_model, num_divisions=4, min_teams_per_division=5)


Дивизион: 0, Количество команд: 5, Количество матчей: 10
Дивизион: 1, Количество команд: 8, Количество матчей: 28
Дивизион: 2, Количество команд: 6, Количество матчей: 15
Дивизион: 3, Количество команд: 8, Количество матчей: 28
Среднее значение наибольших вероятностей для каждого дивизиона:
division
0    80.746392
1    65.316276
2    69.791499
3    68.388226
dtype: float64


Генерация матчей: 100%|██████████| 702/702 [00:02<00:00, 248.55it/s]
Эволюция: 15286it [00:29, 434.41it/s]                      

In [None]:
# # Загрузка данных из файла game_stats_one_r_with_ratings.csv
# game_stats = pd.read_csv("game_stats_one_r.csv")

# # Получение уникальных значений в столбце ID season
# unique_season_ids = game_stats['ID season'].unique()

# # Вывод уникальных значений
# print("Уникальные значения в столбце ID season:")
# print(unique_season_ids)

In [None]:
# #БЛОК 13
# # тут идет оценка распределния ЛИГИ
# season = 73
# try:
#     game_before_season, unique_teams = process_season_data(season)

#     game_before_season.fillna(0, inplace=True)  # Изменение данных на месте
#     game_before_season.replace([np.inf, -np.inf], 0, inplace=True)  # Изменение данных на месте

#     matches = scale_and_select_features(game_before_season)
#     matches["result"] = matches["result"].map({'W': 1, 'L': -1, 'D': 0})
#     matches = matches.dropna()

#     # Преобразование данных в структуру division_teams
#     division_teams = unique_teams.groupby('division')['ID team'].apply(list).to_dict()

#     #match_results_df = simulate_all_matches(division_teams)
#     match_results_df = simulate_all_matches(model, division_teams, matches)
#     average_highest_probabilities = calculate_average_highest_probabilities(match_results_df)

#     # Сохранение результатов в CSV файл
#     match_results_df.to_csv('data/interim/simulated_matches.csv', index=False)
#     unique_teams.to_csv(f"rank_teams_LLHN_{season}.csv", index = False)

#     # Вывод количества команд и количества матчей для каждого дивизиона
#     for division, teams in division_teams.items():
#         num_teams = len(teams)
#         num_matches = num_teams * (num_teams - 1) // 2  # Количество уникальных матчей
#         print(f"Дивизион: {division}, Количество команд: {num_teams}, Количество матчей: {num_matches}")

#     # Вывод среднего значения наибольших вероятностей для каждого дивизиона
#     print("Среднее значение наибольших вероятностей для каждого дивизиона:")
#     print(average_highest_probabilities)
# except Exception as ex:
#     print(ex)


In [None]:
# rank_teams(matches, unique_teams, model, num_divisions=4, min_teams_per_division=5)


In [None]:
# #БЛОК 15
# # Загрузка данных из team_rangirov.csv
# team_rangirov = pd.read_csv('team_rangirov.csv')

# # Преобразование данных в структуру division_teams
# division_teams = team_rangirov.groupby('division')['ID team'].apply(list).to_dict()

# # Выполнение симуляции матчей и вычисление средних наибольших вероятностей
# match_results_df = simulate_all_matches(new_model, division_teams, matches)
# average_highest_probabilities = calculate_average_highest_probabilities(match_results_df)

# # Сохранение результатовл
# match_results_df.to_csv('data/interim/simulated_matches.csv', index=False)

# # Вывод кол-ва команд и количества матчей для каждого дивизиона
# for division, teams in division_teams.items():
#     num_teams = len(teams)
#     num_matches = num_teams * (num_teams - 1) // 2  # Количество уникальных матчей
#     print(f"Дивизион: {division}, Количество команд: {num_teams}, Количество матчей: {num_matches}")

# # Вывод среднего значения наибольших вероятностей для каждого дивизиона
# print("Среднее значение наибольших вероятностей для каждого дивизиона:")
# print(average_highest_probabilities)


In [None]:
# #Присваиваем названия командам
# df_team_name = pd.read_csv("data/raw/teams_name_utf8.csv")

# unique_teams = team_rangirov["ID team"]

# filtred_name_df = df_team_name[df_team_name["ID team"].isin(unique_teams)]

# merged_df = pd.merge(team_rangirov, filtred_name_df, on='ID team', how='left')

# merged_df.to_csv(f"ranking_teams_{season}.csv", index=False)
