In [419]:
# Generate a schedule for CXP!

# imports 
import csv
import os
import numpy as np
import pandas as pd
import math

In [420]:
EXPERIMENTAL = 'experimental'
OFFICIAL = 'official'

SIGNUP_CSV_FILE = '2023-2024_signups/CXP_2023-2024_Season_Signups.csv'

VARSITY_FILE = '2023-2024_signups/CXP_2023_24_Varsity.csv'
OPEN_FILE = '2023-2024_signups/CXP_2023_24_Open.csv'
CLUB_FILE = '2023-2024_signups/CXP_2023_24_Club.csv'

SCHEDULE_FOLDER = '2023-2024_schedules'
ORDERED_SCHEDULE_FOLDER = '2023-2024_ordered_schedules'

varsity_dict = {"team": [], "division": []}
open_dict = {"team": [], "division": []}
club_dict = {"team": [], "division": []}

## Read ins
# Varsity 
with open(VARSITY_FILE, 'r', encoding='utf8') as f_varsity:
    reader_varsity = csv.reader(f_varsity)
    
    for idx, line in enumerate(reader_varsity):
        if idx == 0:
            continue
        varsity_dict["team"].append(line[0])
        varsity_dict["division"].append(line[2])

# Open 
with open(OPEN_FILE, 'r', encoding='utf8') as f_open:
    reader_open = csv.reader(f_open)
    
    for idx, line in enumerate(reader_open):
        if idx == 0:
            continue
        open_dict["team"].append(line[0])
        open_dict["division"].append(line[2])

# Club 
with open(CLUB_FILE, 'r', encoding='utf8') as f_club:
    reader_club = csv.reader(f_club)
    
    for idx, line in enumerate(reader_club):
        if idx == 0:
            continue
        club_dict["team"].append(line[0])
        club_dict["division"].append(line[2])


print('Total Number of Teams:')
print(len(varsity_dict["team"]) + len(open_dict["team"]) + len(club_dict["team"]))
print('---')
print('Number of Varsity Teams:')
print(len(varsity_dict["team"]))
print('Number of Open Teams:')
print(len(open_dict["team"]))
print('Number of Club Teams:')
print(len(club_dict["team"]))

Total Number of Teams:
135
---
Number of Varsity Teams:
25
Number of Open Teams:
31
Number of Club Teams:
79


In [421]:
varsity_df = pd.DataFrame(varsity_dict)
open_df = pd.DataFrame(open_dict)
club_df = pd.DataFrame(club_dict)


In [422]:
varsity_divisions = varsity_df["division"].unique()
open_divisions = open_df["division"].unique()
club_divisions = club_df["division"].unique()

varsity_divisions = varsity_divisions[0:len(varsity_divisions) - 1]
open_divisions = open_divisions[0:len(open_divisions) - 1]
club_divisions = club_divisions[0:len(club_divisions) - 1]

print(varsity_divisions)
print(open_divisions)
print(club_divisions)

['South/West' 'East 2' 'East 1']
['Central/West' 'East 1' 'East 2']
['East 4' 'East 3' 'Central 1' 'Central 2' 'West 1' 'West 2' 'East 1'
 'East 5' 'East 2']


In [423]:
# drop empty values in data frames 
varsity_df['team'].replace('', np.nan, inplace=True)
varsity_df['division'].replace('', np.nan, inplace=True)
varsity_df.dropna(inplace=True)

open_df['team'].replace('', np.nan, inplace=True)
open_df['division'].replace('', np.nan, inplace=True)
open_df.dropna(inplace=True)

club_df['team'].replace('', np.nan, inplace=True)
club_df['division'].replace('', np.nan, inplace=True)
club_df.dropna(inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  varsity_df['team'].replace('', np.nan, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  varsity_df['division'].replace('', np.nan, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we ar

In [424]:
# get max number of teams for each league (varsity, open, club)
varsity_max_teams_division = np.max(varsity_df['division'].value_counts())
open_max_teams_division = np.max(open_df['division'].value_counts())
club_max_teams_division = np.max(club_df['division'].value_counts())

print('Largest number of teams in a division for varsity: ')
print(varsity_max_teams_division)
print('Largest number of teams in a division for open: ')
print(open_max_teams_division)
print('Largest number of teams in a division for club: ')
print(club_max_teams_division)

Largest number of teams in a division for varsity: 
6
Largest number of teams in a division for open: 
8
Largest number of teams in a division for club: 
6


In [425]:
# get min number of teams for each league (varsity, open, club)
varsity_min_teams_division = np.min(varsity_df['division'].value_counts())
open_min_teams_division = np.min(open_df['division'].value_counts())
club_min_teams_division = np.min(club_df['division'].value_counts())

print('Smallest number of teams in a division for varsity: ')
print(varsity_min_teams_division)
print('Smallest number of teams in a division for open: ')
print(open_min_teams_division)
print('Smallest number of teams in a division for club: ')
print(club_min_teams_division)

Smallest number of teams in a division for varsity: 
6
Smallest number of teams in a division for open: 
7
Smallest number of teams in a division for club: 
5


In [426]:
SETS_OF_MATCHES = 12  # 6 weeks of matches, 2 sets of matches per week 
WEEKS_OF_MATCHES = 6

# use this if we are going to have each division have the same number of total matches 
VARSITY_TOTAL_MATCHES = WEEKS_OF_MATCHES * varsity_max_teams_division
OPEN_TOTAL_MATCHES = WEEKS_OF_MATCHES * open_max_teams_division
CLUB_TOTAL_MATCHES = WEEKS_OF_MATCHES * club_max_teams_division

print(VARSITY_TOTAL_MATCHES)
print(OPEN_TOTAL_MATCHES)
print(CLUB_TOTAL_MATCHES)

GOAL_MATCHES_PER_TEAM = 12

36
48
36


In [31]:
# gets all the permutations of matches for a division
def get_all_combos(df, division):
    division_df = df[df["division"] == division]
    num_teams_in_division = len(division_df)
    combos = {'away_team': [], 'home_team': []}
    combos_named = []
    for ii in range(num_teams_in_division):
        for jj in range(num_teams_in_division):
            # if we are on the same team, just continue 
            if division_df['team'].iloc[ii] == division_df['team'].iloc[jj]:
                continue 

            potential_home_team = division_df['team'].iloc[ii]
            potential_away_team = division_df['team'].iloc[jj]

            potential_match_name = potential_away_team + ' vs ' + potential_home_team

            # this means its unique so this is a legit match
            if potential_match_name not in combos:
                combos_named.append(potential_match_name)
                combos['away_team'].append(potential_away_team)
                combos['home_team'].append(potential_home_team)
    
    return combos, combos_named

# test1 = get_all_combos(varsity_df, varsity_divisions[0])
# test2 = np.array(test1)
# print(len(test1))
# print(len(np.unique(test2)))

# def make_schedule(total_sets, df, division):
# option param: can either be "team_based" (this means that total number of matches in division is based on number of teams in division) OR
#               "equal" (this means that total number of matches in division is the same between all divisions.  It is based on the division w the largest num of teams)
def make_schedule_combos(total_weeks, df, division, option="team_based", total_matches=None):
    division_df = df[df["division"] == division]
    num_teams_in_division = len(division_df)
    matches = {'away_team': [], 'home_team': []}
    matches_worded = []

    non_named_combos, all_combos = get_all_combos(df, division)
    uniques_reached = 0
    

    if option == "team_based":
        matches_for_the_week = num_teams_in_division
        for week_num in range(total_weeks):
            # number of potential matches that are legit matches 
            matches_made = 0

            while matches_made < matches_for_the_week:
                print('here3')
                print('matches made here 3')
                print(matches_made)
                print('matches for week here 3 ')
                print(matches_for_the_week)
                print('combos length here 3 ')
                print(len(all_combos))
                print('matches worded here 3 ')
                print(len(matches_worded))
                print(num_teams_in_division)
                print('all_combos')
                print(all_combos)
                print('division_df')
                print(division_df)
                print('division')
                print(division)
                if uniques_reached:
                    for kk, combo in enumerate(all_combos):
                        combo_split = combo.split(' vs ')
                        away_team = combo_split[0]
                        home_team = combo_split[1]

                        matches_worded.append(combo)
                        matches['away_team'].append(away_team)
                        matches['home_team'].append(home_team)
                        matches_made += 1

                        print('matches made')
                        print(matches_made)
                        print('matches needed for the week')
                        print(matches_for_the_week)

                        if matches_made >= matches_for_the_week:
                            print('here2')
                            break


                else:
                    # loop thru the teams 
                    for ii in range(num_teams_in_division):
                        for jj in range(num_teams_in_division):
                            # if we are on the same team, just continue 
                            if division_df['team'].iloc[ii] == division_df['team'].iloc[jj]:
                                continue 

                            potential_home_team = division_df['team'].iloc[ii]
                            potential_away_team = division_df['team'].iloc[jj]

                            potential_match_name = potential_away_team + ' vs ' + potential_home_team

                            # print(len(matches_worded))
                            # print(len(all_combos))
                            print('all combos')
                            print(len(all_combos))
                            print('so far combos')
                            print(len(matches_worded))

                            # this means its unique so this is a legit match
                            if potential_match_name not in matches_worded:
                                matches_worded.append(potential_match_name)
                                matches['away_team'].append(potential_away_team)
                                matches['home_team'].append(potential_home_team)
                                matches_made += 1

                                # print(matches_made)
                                # print(matches_for_the_week)

                                if matches_made >= matches_for_the_week:
                                    break
                            # if we have not yet reached our matches for the week count and all unique combos have been reached, we need to just start looping over and adding
                            # the already unique matches in order until we reach our matches for the week count 
                            elif len(all_combos) == len(matches_worded):  # risky for now, but lets just compare sizes instead of elements 
                                print('here1')
                                uniques_reached = 1
                                break
                            

                        if matches_made >= matches_for_the_week:
                            break
                        elif uniques_reached == 1:
                            break
    elif option == 'equal':
        matches_for_the_week = total_matches
        # number of potential matches that are legit matches 
        matches_made = 0
        for match_num in range(total_matches):

            while matches_made < total_matches:
                print('here3')
                print('matches made here 3')
                print(matches_made)
                print('total matches here 3 ')
                print(total_matches)
                print('combos length here 3 ')
                print(len(all_combos))
                print('matches worded here 3 ')
                print(len(matches_worded))
                print(num_teams_in_division)
                print('all_combos')
                print(all_combos)
                print('division_df')
                print(division_df)
                print('division')
                print(division)
                if uniques_reached:
                    for kk, combo in enumerate(all_combos):
                        combo_split = combo.split(' vs ')
                        away_team = combo_split[0]
                        home_team = combo_split[1]

                        matches_worded.append(combo)
                        matches['away_team'].append(away_team)
                        matches['home_team'].append(home_team)
                        matches_made += 1

                        print('matches made')
                        print(matches_made)
                        print('total matches')
                        print(total_matches)

                        if matches_made >= total_matches:
                            print('here2')
                            break


                else:
                    # loop thru the teams 
                    for ii in range(num_teams_in_division):
                        for jj in range(num_teams_in_division):
                            # if we are on the same team, just continue 
                            if division_df['team'].iloc[ii] == division_df['team'].iloc[jj]:
                                continue 

                            potential_home_team = division_df['team'].iloc[ii]
                            potential_away_team = division_df['team'].iloc[jj]

                            potential_match_name = potential_away_team + ' vs ' + potential_home_team

                            # print(len(matches_worded))
                            # print(len(all_combos))
                            print('all combos')
                            print(len(all_combos))
                            print('so far combos')
                            print(len(matches_worded))

                            # this means its unique so this is a legit match
                            if potential_match_name not in matches_worded:
                                matches_worded.append(potential_match_name)
                                matches['away_team'].append(potential_away_team)
                                matches['home_team'].append(potential_home_team)
                                matches_made += 1

                                # print(matches_made)
                                # print(total_matches)

                                if matches_made >= total_matches:
                                    break
                            # if we have not yet reached our matches for the week count and all unique combos have been reached, we need to just start looping over and adding
                            # the already unique matches in order until we reach our matches for the week count 
                            elif len(all_combos) == len(matches_worded):  # risky for now, but lets just compare sizes instead of elements 
                                print('here1')
                                uniques_reached = 1
                                break
                            

                        if matches_made >= total_matches:
                            break
                        elif uniques_reached == 1:
                            break

    return matches, matches_worded

# equal option debugging
all_matches, all_matches_worded = make_schedule_combos(WEEKS_OF_MATCHES, varsity_df, varsity_divisions[0], option="equal", total_matches=VARSITY_TOTAL_MATCHES)

print(all_matches_worded)
print(len(all_matches_worded))

# team_based option debugging 
# # OPTION 1
# # DO THIS IF WE ARE JUST DOING A SET AMOUNT OF MATCHES PER WEEK
# # all_matches, all_matches_worded = make_schedule(SETS_OF_MATCHES, varsity_df, varsity_divisions[0])
# all_matches, all_matches_worded = make_schedule_combos(WEEKS_OF_MATCHES, varsity_df, varsity_divisions[0])
# 
# print(all_matches_worded)
# print(len(all_matches_worded))
# 
# 
# # OPTION 2
# # # DO THIS IF WE ARE HAVING EACH TEAM PLAY EACH OTHER TWICE --> will this lead to too many matches per week?
# # all_matches, all_matches_named = get_all_combos(varsity_df, varsity_divisions[0])
# # print(all_matches_named)
# # print(len(all_matches_named))
# # print('Matches needed per week: ')
# # print(len(all_matches_named) / 6)
# # print('Teams in division')
# # print(len(varsity_df[varsity_df['division'] == varsity_divisions[0]]))

here3
matches made here 3
0
total matches here 3 
48
combos length here 3 
56
matches worded here 3 
0
8
all_combos
['CSU Fullerton Blue vs Oklahoma Christian University', 'Cumberland University vs Oklahoma Christian University', "St. Edward's University  vs Oklahoma Christian University", 'Ole Miss vs Oklahoma Christian University', 'Independence Community College vs Oklahoma Christian University', 'Midway University vs Oklahoma Christian University', 'Utah Valley Esports vs Oklahoma Christian University', 'Oklahoma Christian University vs CSU Fullerton Blue', 'Cumberland University vs CSU Fullerton Blue', "St. Edward's University  vs CSU Fullerton Blue", 'Ole Miss vs CSU Fullerton Blue', 'Independence Community College vs CSU Fullerton Blue', 'Midway University vs CSU Fullerton Blue', 'Utah Valley Esports vs CSU Fullerton Blue', 'Oklahoma Christian University vs Cumberland University', 'CSU Fullerton Blue vs Cumberland University', "St. Edward's University  vs Cumberland University",

In [34]:
# This is for OPTION 2 - this is the route we are going with.  Our goal is 12 matches played per team for the season 
# NOTE: 
# Permutation formula P(n,r) where n is objects and r is subset of the objects picked
# Number of matches per team if they play each other twice = (P(n,r) / n) * 2
# get values if teams just play each other twice
varsity_combo_info = {'num_teams': [], 'team_value_counts': []} 
for varsity_division in varsity_divisions:
    tmp_combos, tmp_combos_named = get_all_combos(varsity_df, varsity_division)
    tmp_df = pd.DataFrame(tmp_combos)
    tmp_num_teams = len(np.unique(tmp_df['home_team']))
    tmp_combined = pd.concat([tmp_df['away_team'], tmp_df['home_team']], ignore_index=True)
    tmp_value_counts = tmp_combined.value_counts()
    varsity_combo_info['num_teams'].append(tmp_num_teams)
    varsity_combo_info['team_value_counts'].append(tmp_value_counts)

open_combo_info = {'num_teams': [], 'team_value_counts': []} 
for open_division in open_divisions:
    tmp_combos, tmp_combos_named = get_all_combos(open_df, open_division)
    tmp_df = pd.DataFrame(tmp_combos)
    tmp_num_teams = len(np.unique(tmp_df['home_team']))
    tmp_combined = pd.concat([tmp_df['away_team'], tmp_df['home_team']], ignore_index=True)
    tmp_value_counts = tmp_combined.value_counts()
    open_combo_info['num_teams'].append(tmp_num_teams)
    open_combo_info['team_value_counts'].append(tmp_value_counts)

club_combo_info = {'num_teams': [], 'team_value_counts': []} 
for club_division in club_divisions:
    tmp_combos, tmp_combos_named = get_all_combos(club_df, club_division)
    tmp_df = pd.DataFrame(tmp_combos)
    tmp_num_teams = len(np.unique(tmp_df['home_team']))
    tmp_combined = pd.concat([tmp_df['away_team'], tmp_df['home_team']], ignore_index=True)
    tmp_value_counts = tmp_combined.value_counts()
    club_combo_info['num_teams'].append(tmp_num_teams)
    club_combo_info['team_value_counts'].append(tmp_value_counts)

print(varsity_combo_info)
print(open_combo_info)
print(club_combo_info)


{'num_teams': [8, 5, 6], 'team_value_counts': [CSU Fullerton Blue                14
Cumberland University             14
St. Edward's University           14
Ole Miss                          14
Independence Community College    14
Midway University                 14
Utah Valley Esports               14
Oklahoma Christian University     14
Name: count, dtype: int64, UNCW                             8
Newberry College                 8
High Point University            8
Florida Gulf Coast University    8
Mars Hill University             8
Name: count, dtype: int64, Conestoga College                                10
Rochester Institute of Technology (RIT Black)    10
University of New Haven                          10
Farmingdale State College                        10
St. Clair College                                10
Utica University                                 10
Name: count, dtype: int64]}
{'num_teams': [7, 7, 7], 'team_value_counts': [The University of Tennessee at Chattanoog

In [10]:
# equal option
# VARSITY
varsity_matches_dict = {}
for varsity_division in varsity_divisions:
    tmp_matches, tmp_matches_worded = make_schedule_combos(WEEKS_OF_MATCHES, varsity_df, varsity_division, option='equal', total_matches=VARSITY_TOTAL_MATCHES)
    varsity_matches_dict[varsity_division] = tmp_matches

# OPEN
open_matches_dict = {}
for open_division in open_divisions:
    tmp_matches, tmp_matches_worded = make_schedule_combos(WEEKS_OF_MATCHES, open_df, open_division, option='equal', total_matches=OPEN_TOTAL_MATCHES)
    open_matches_dict[open_division] = tmp_matches

# CLUB
club_matches_dict = {}
for club_division in club_divisions:
    tmp_matches, tmp_matches_worded = make_schedule_combos(WEEKS_OF_MATCHES, club_df, club_division, option='equal', total_matches=CLUB_TOTAL_MATCHES)
    club_matches_dict[club_division] = tmp_matches

here3
matches made here 3
0
total matches here 3 
48
combos length here 3 
56
matches worded here 3 
0
8
all_combos
['CSU Fullerton Blue vs Oklahoma Christian University', 'Cumberland University vs Oklahoma Christian University', "St. Edward's University  vs Oklahoma Christian University", 'Ole Miss vs Oklahoma Christian University', 'Independence Community College vs Oklahoma Christian University', 'Midway University vs Oklahoma Christian University', 'Utah Valley Esports vs Oklahoma Christian University', 'Oklahoma Christian University vs CSU Fullerton Blue', 'Cumberland University vs CSU Fullerton Blue', "St. Edward's University  vs CSU Fullerton Blue", 'Ole Miss vs CSU Fullerton Blue', 'Independence Community College vs CSU Fullerton Blue', 'Midway University vs CSU Fullerton Blue', 'Utah Valley Esports vs CSU Fullerton Blue', 'Oklahoma Christian University vs Cumberland University', 'CSU Fullerton Blue vs Cumberland University', "St. Edward's University  vs Cumberland University",

In [13]:
# team_based option
# OPTION 1
# VARSITY
varsity_matches_dict = {}
for varsity_division in varsity_divisions:
    tmp_matches, tmp_matches_worded = make_schedule_combos(WEEKS_OF_MATCHES, varsity_df, varsity_division)
    varsity_matches_dict[varsity_division] = tmp_matches

# OPEN
open_matches_dict = {}
for open_division in open_divisions:
    tmp_matches, tmp_matches_worded = make_schedule_combos(WEEKS_OF_MATCHES, open_df, open_division)
    open_matches_dict[open_division] = tmp_matches

# CLUB
club_matches_dict = {}
for club_division in club_divisions:
    tmp_matches, tmp_matches_worded = make_schedule_combos(WEEKS_OF_MATCHES, club_df, club_division)
    club_matches_dict[club_division] = tmp_matches

here3
matches made here 3
0
matches for week here 3 
8
combos length here 3 
56
matches worded here 3 
0
8
all_combos
['CSU Fullerton Blue vs Oklahoma Christian University', 'Cumberland University vs Oklahoma Christian University', "St. Edward's University  vs Oklahoma Christian University", 'Ole Miss vs Oklahoma Christian University', 'Independence Community College vs Oklahoma Christian University', 'Midway University vs Oklahoma Christian University', 'Utah Valley Esports vs Oklahoma Christian University', 'Oklahoma Christian University vs CSU Fullerton Blue', 'Cumberland University vs CSU Fullerton Blue', "St. Edward's University  vs CSU Fullerton Blue", 'Ole Miss vs CSU Fullerton Blue', 'Independence Community College vs CSU Fullerton Blue', 'Midway University vs CSU Fullerton Blue', 'Utah Valley Esports vs CSU Fullerton Blue', 'Oklahoma Christian University vs Cumberland University', 'CSU Fullerton Blue vs Cumberland University', "St. Edward's University  vs Cumberland University

In [35]:
# team_based option
# OPTION 2
# VARSITY
varsity_matches_dict = {}
for varsity_division in varsity_divisions:
    tmp_matches, tmp_matches_named = get_all_combos(varsity_df, varsity_division)
    varsity_matches_dict[varsity_division] = tmp_matches

# OPEN
open_matches_dict = {}
for open_division in open_divisions:
    tmp_matches, tmp_matches_named = get_all_combos(open_df, open_division)
    open_matches_dict[open_division] = tmp_matches

# CLUB
club_matches_dict = {}
for club_division in club_divisions:
    tmp_matches, tmp_matches_named = get_all_combos(club_df, club_division)
    club_matches_dict[club_division] = tmp_matches


In [427]:
# Permutation formula P(n,r) where n is objects and r is sample of the objects picked
def permutation_calc(n, r):
    return (math.factorial(n)) / (math.factorial(n - r))

# Number of matches per team if they play each other twice = (P(n,r) / n) * 2
# variable for our match per team goal is GOAL_MATCHES_PER_TEAM

# therefore, we can solve for this and see if our result is an integer.  if it is that makes things easier for us 
# GOAL_MATCHES_PER_TEAM / (P(n,r) / n) = number of times each team plays each other 
# GOAL_MATCHES_PER_TEAM / (P(# of teams in division, 2) / # of teams in division) = number of times each team plays each other 

varsity_division_freq = {} # keys: each division, values: how many times each team plays each other 
open_division_freq = {}
club_division_freq = {}

for division in varsity_divisions:
    num_teams_in_division = len(varsity_df[varsity_df["division"] == division])
    P_nr = permutation_calc(num_teams_in_division, 2)

    num_times_play_each_other = GOAL_MATCHES_PER_TEAM / (P_nr / num_teams_in_division)

    # print(num_times_play_each_other)
    # print(num_times_play_each_other.is_integer())

    varsity_division_freq[division] = num_times_play_each_other

for division in open_divisions:
    num_teams_in_division = len(open_df[open_df["division"] == division])
    P_nr = permutation_calc(num_teams_in_division, 2)

    num_times_play_each_other = GOAL_MATCHES_PER_TEAM / (P_nr / num_teams_in_division)

    # print(num_times_play_each_other)
    # print(num_times_play_each_other.is_integer())

    open_division_freq[division] = num_times_play_each_other

for division in club_divisions:
    num_teams_in_division = len(club_df[club_df["division"] == division])
    P_nr = permutation_calc(num_teams_in_division, 2)

    num_times_play_each_other = GOAL_MATCHES_PER_TEAM / (P_nr / num_teams_in_division)

    # print(num_times_play_each_other)
    # print(num_times_play_each_other.is_integer())

    club_division_freq[division] = num_times_play_each_other



In [428]:
# this function will get us all of the matches we need 
# inputs: league data frame, list of divisions for the league, dictionary of how many times a team should play each other in each league's division
# output: matches dictionary with away teams and home teams 
def get_matches(df, division, freq_dict):
    division_df = df[df["division"] == division]
    freq_value = freq_dict[division]
    num_teams_in_division = len(division_df)

    permutation_matches = {'away_team': [], 'home_team': []}
    permutation_matches_named = []
    
    combination_matches = {'away_team': [], 'home_team': []}
    combination_matches_named = []

    # gets permutations (teams play each other twice) and combinations (teams play each other once)
    for ii in range(num_teams_in_division):
        for jj in range(num_teams_in_division):
            # if we are on the same team, just continue 
            if division_df['team'].iloc[ii] == division_df['team'].iloc[jj]:
                continue 

            potential_home_team = division_df['team'].iloc[ii]
            potential_away_team = division_df['team'].iloc[jj]

            potential_match_name = potential_away_team + ' vs ' + potential_home_team
            reversed_potential_match_name = potential_home_team + ' vs ' + potential_away_team

            if potential_match_name not in permutation_matches:
                permutation_matches_named.append(potential_match_name)
                permutation_matches['away_team'].append(potential_away_team)
                permutation_matches['home_team'].append(potential_home_team)

            if potential_match_name not in combination_matches and reversed_potential_match_name not in combination_matches_named:
                combination_matches_named.append(potential_match_name)
                combination_matches['away_team'].append(potential_away_team)
                combination_matches['home_team'].append(potential_home_team)
    
    permutation_matches_df = pd.DataFrame(permutation_matches)
    combination_matches_df = pd.DataFrame(combination_matches)
    
    # in the fuzzy cases where we need to add or remove from the matches, we should be returing this 
    total_matches = permutation_matches.copy()
    
    # now that we have the permutations figure out if we need to remove any matches or add more matches 
    if freq_value == 2.0:
        return permutation_matches 
    else:
        # if it is an integer value, then we can just have teams play each other (freq_value - 2) more times 
        if freq_value.is_integer():
            play_delta = freq_value - 2.0

            print('integer freq value')

            # if play detla is negative, just use combinations (because play delta needs to be 1 in that case)
            if play_delta < 0.0:
                print('play_delta negative')
                return combination_matches
            elif play_delta % 2 == 0:
                # if play delta is even, we can divide it by two and see how many times we need to re-add the permutations 
                print('play_delta is even')
                times_to_readd_permutation = play_delta / 2

                for ii in range(times_to_readd_permutation):
                    permute_away_teams = permutation_matches['away_team']
                    permute_home_teams = permutation_matches['home_team']

                    total_matches['away_team'] = total_matches['away_team'] + permute_away_teams
                    total_matches['home_team'] = total_matches['home_team'] + permute_home_teams
                
                return total_matches
            
            elif play_delta % 2 != 0:
                # if play delta is odd, divide it by two and take the floor
                #   if that value is zero then just add the combinations.  If that value is one or greater, then add the permutations the same number of times as that value and then add the combinations once after 
                print('play_delta is odd')
                times_to_readd_permutation = math.floor(play_delta / 2)

                if times_to_readd_permutation == 0:
                    combo_away_teams = combination_matches['away_team']
                    combo_home_teams = combination_matches['home_team']

                    total_matches['away_team'] = total_matches['away_team'] + combo_away_teams
                    total_matches['home_team'] = total_matches['home_team'] + combo_home_teams
                elif times_to_readd_permutation >= 1:
                    for ii in range(times_to_readd_permutation):
                        permute_away_teams = permutation_matches['away_team']
                        permute_home_teams = permutation_matches['home_team']

                        total_matches['away_team'] = total_matches['away_team'] + permute_away_teams
                        total_matches['home_team'] = total_matches['home_team'] + permute_home_teams

                    combo_away_teams = combination_matches['away_team']
                    combo_home_teams = combination_matches['home_team']

                    total_matches['away_team'] = total_matches['away_team'] + combo_away_teams
                    total_matches['home_team'] = total_matches['home_team'] + combo_home_teams
                    
                return total_matches


        else:
            # find out if we need to add more matches or subtract more matches 
            if freq_value > 2.0:    # in this case we need to add more matches 
                print('add more matches')

                home_away_combined_df = pd.concat([permutation_matches_df['away_team'], permutation_matches_df['home_team']], ignore_index=True)
                current_matches_per_team_series = home_away_combined_df.value_counts()

                # print(current_matches_per_team_series.loc['University of New Haven'])

                # permutations loop 
                for ii in range(len(permutation_matches_df)):
                    tmp_away_team = permutation_matches_df.iloc[ii, 0] 
                    tmp_home_team = permutation_matches_df.iloc[ii, 1] 
                    # print(tmp_away_team)
                    # print(tmp_home_team)

                    # check to see if the away team is already at their limit of matches, if so skip
                    if current_matches_per_team_series.loc[tmp_away_team] == GOAL_MATCHES_PER_TEAM:
                        continue

                    # check to see if the home team is already at their limit of matches, if so skip
                    if current_matches_per_team_series.loc[tmp_home_team] == GOAL_MATCHES_PER_TEAM:
                        continue 

                    # at this point we know that neither of these teams are at their limits.  so we should add the match and increment the value in the current matches per team series
                    total_matches['away_team'].append(tmp_away_team)
                    current_matches_per_team_series.loc[tmp_away_team] += 1
                    total_matches['home_team'].append(tmp_home_team)
                    current_matches_per_team_series.loc[tmp_home_team] += 1

                return total_matches


            else:                   # in this case we need to subtract matches 
                print('subtract matches')
                # need to figure this out still (might need to do this manually)

                home_away_combined_df = pd.concat([permutation_matches_df['away_team'], permutation_matches_df['home_team']], ignore_index=True)
                current_matches_per_team_series = home_away_combined_df.value_counts()

                total_matches_df = pd.DataFrame(total_matches)
                inds_to_drop = []

                for ii in range(len(permutation_matches_df)):
                    tmp_away_team = permutation_matches_df.iloc[ii, 0] 
                    tmp_home_team = permutation_matches_df.iloc[ii, 1] 

                    # check to see if the away team is already at their limit of matches, if so skip
                    if current_matches_per_team_series.loc[tmp_away_team] == GOAL_MATCHES_PER_TEAM:
                        continue

                    # check to see if the home team is already at their limit of matches, if so skip
                    if current_matches_per_team_series.loc[tmp_home_team] == GOAL_MATCHES_PER_TEAM:
                        continue 

                    # at this point we know that neither of these teams are at their limits.  so we should capture this match index and decrement the value in the current matches per team series
                    inds_to_drop.append(ii)
                    current_matches_per_team_series.loc[tmp_away_team] -= 1
                    current_matches_per_team_series.loc[tmp_home_team] -= 1
                
                total_matches_df = total_matches_df.drop(inds_to_drop)

                # convert totals_matches_df back into a dict of lists 
                total_matches = total_matches_df.reset_index().to_dict(orient='list')

                return total_matches



    # tmp_combined = pd.concat([tmp_df['away_team'], tmp_df['home_team']], ignore_index=True)
    # tmp_value_counts = tmp_combined.value_counts()


        return
    
# print(varsity_division_freq)
# print(open_division_freq)
# print(club_division_freq)


# test = get_matches(varsity_df, varsity_divisions[0], varsity_division_freq) # South/West  --> WORKS
# test = get_matches(varsity_df, varsity_divisions[2], varsity_division_freq) # East 1  --> WORKS
# test = get_matches(varsity_df, varsity_divisions[1], varsity_division_freq) # East 2  --> WORKS
# test = get_matches(open_df, open_divisions[1], open_division_freq) # East 1 --> WORKS
# test = get_matches(club_df, club_divisions[0], club_division_freq) # East 4 --> WORKS
# test = get_matches(club_df, club_divisions[2], club_division_freq) # Central 1 --> WORKS



# print(len(test['away_team']))
# test_df = pd.DataFrame(test)
# test_combined_df = pd.concat([test_df['away_team'], test_df['home_team']], ignore_index=True)
# print(test_combined_df.value_counts())

    

In [429]:
# VARSITY
varsity_matches_dict = {}
for varsity_division in varsity_divisions:
    tmp_matches = get_matches(varsity_df, varsity_division, varsity_division_freq)
    varsity_matches_dict[varsity_division] = tmp_matches

# OPEN
open_matches_dict = {}
for open_division in open_divisions:
    tmp_matches = get_matches(open_df, open_division, open_division_freq)
    open_matches_dict[open_division] = tmp_matches

# CLUB
club_matches_dict = {}
for club_division in club_divisions:
    tmp_matches = get_matches(club_df, club_division, club_division_freq)
    club_matches_dict[club_division] = tmp_matches


add more matches
add more matches
add more matches
subtract matches
add more matches
integer freq value
play_delta is odd
add more matches
add more matches
integer freq value
play_delta is odd
add more matches
integer freq value
play_delta is odd
add more matches
add more matches


In [197]:
def order_matches(division_matches_dict, division, WEEKS_OF_MATCHES, hard_codings=None):
    ordered_dict = {}
    # ordered_dict[division] = {}

    # initialize what we want to return 
    for ii in range(WEEKS_OF_MATCHES):
        dict_key = 'week' + str(ii)
        # ordered_dict[division][dict_key] = {'away_team': [], 'home_team': []}   # this will hold a list of the matches for this week
        ordered_dict[dict_key] = {'away_team': [], 'home_team': []}   # this will hold a list of the matches for this week
    
    # for jj in range(WEEKS_OF_MATCHES):

    # get number of teams in the division and find out how many matches are needed per week 
    division_matches_df = pd.DataFrame(division_matches_dict)
    total_division_matches = len(division_matches_df)
    num_teams_in_division = len(division_matches_df['away_team'])
    matches_needed_per_week = total_division_matches / WEEKS_OF_MATCHES

    # # lets try this 
    # # get a teams home and away games and add them to each week.  One home and one away game per week 
    # for jj in range(len(division_matches_dict['home_team'])):
    #     # tmp_away_team = division_matches_dict['away_team'][jj]
    #     tmp_home_team = division_matches_dict['home_team'][jj]
    #     # data frame with the teams home and away matches 
    #     team_df = pd.concat([division_matches_df[division_matches_df['home_team'] == tmp_home_team], division_matches_df['away_team'] == tmp_home_team], ignore_index=True)
    #     
    #     curr_week = 0
    #     for kk in range(len(team_df)):
    #         away_team_to_add = team_df['away_team'].iloc[kk]
    #         home_team_to_add = team_df['home_team'].iloc[kk]
    #         curr_week = curr_week % 6
    #         dict_key_name = 'week' + str(curr_week)

    #         tmp_df = pd.DataFrame(ordered_dict[dict_key_name])
    #         freq_away_teams_for_week = tmp_df['away_team'].value_counts() # for a team this will show how many times they are away in the week  
    #         freq_home_teams_for_week = tmp_df['home_team'].value_counts() # for a team this will show how many times they are home in the week 

    #         if (away_team_to_add == tmp_home_team):
    #             if away_team_to_add in freq_away_teams_for_week:
    #                 if freq_away_teams_for_week[away_team_to_add] == 1:
    #                     continue
    #         if (home_team_to_add == tmp_home_team):
    #             if home_team_to_add in freq_home_teams_for_week:
    #                 if freq_home_teams_for_week[home_team_to_add] == 1:
    #                     continue


    #         ordered_dict[dict_key_name]['away_team'].append(away_team_to_add)
    #         ordered_dict[dict_key_name]['home_team'].append(home_team_to_add)
    #         curr_week = curr_week + 1


    # # trying this out 
    # # filter by home team, sprinkle those out to each week and overlap if needed
    # # then fill in the weeks until theyre full and make sure teams are playing no more than 2 matches per week 
    # for jj in range(len(division_matches_dict['home_team'])):
    #     tmp_away_team = division_matches_dict['away_team'][jj]
    #     tmp_home_team = division_matches_dict['home_team'][jj]
    #     # filter df 
    #     home_team_df = division_matches_df[division_matches_df['home_team'] == tmp_home_team]
    #     # go thru home_team_df and add to each week
    #     curr_week = 0
    #     for kk in range(len(home_team_df)):
    #         away_team_to_add = home_team_df['away_team'].iloc[kk]
    #         home_team_to_add = home_team_df['home_team'].iloc[kk]
    #         curr_week = curr_week % 6

    #         dict_key_name = 'week' + str(curr_week)
    #         ordered_dict[dict_key_name]['away_team'].append(away_team_to_add)
    #         ordered_dict[dict_key_name]['home_team'].append(home_team_to_add)

    #         curr_week = curr_week + 1
    
    # for aa in range(len(division_matches_dict['home_team'])):
    #     tmp_away_team = division_matches_dict['away_team'][aa]
    #     tmp_home_team = division_matches_dict['home_team'][aa]


    # # maybe use a while loop until there are X matches per week
    # matches_per_week = np.zeros(WEEKS_OF_MATCHES)  # 6 weeks 
    # sum_matches_per_week = np.sum(matches_per_week)

    # while(sum_matches_per_week < total_division_matches):

    # maybe instead we want to go thru the list of teams and for each team we just put them in for two matches per week 
    list_of_teams = pd.concat([division_matches_df['away_team'], division_matches_df['home_team']], ignore_index=True).unique()
    tracking_df = division_matches_df.copy()
    for team in list_of_teams:
        this_teams_home_df = division_matches_df[division_matches_df['home_team'] == team]
        this_teams_away_df = division_matches_df[division_matches_df['away_team'] == team]
        if len(this_teams_home_df) > len(this_teams_away_df):
            first_df = this_teams_home_df.copy()
            second_df = this_teams_away_df.copy()
        else: 
            first_df = this_teams_away_df.copy()
            second_df = this_teams_home_df.copy()
        
        
        # print(len(first_df))
        # print(len(second_df))

        # this_team_df = pd.concat([division_matches_df[division_matches_df['away_team'] == team], division_matches_df[division_matches_df['home_team'] == team]], ignore_index=True)
        # first_df loop
        # kk = 0 # week counter  --> LEAVING OFF NOTE: did this because we need to add in all the home matches first (if home is greater than away) and it needs to be added to the weeks until we run out of the home matches.
        for jj in range(len(first_df)):
            # kk = kk % WEEKS_OF_MATCHES
            tmp_away_team = first_df['away_team'].iloc[jj]
            tmp_home_team = first_df['home_team'].iloc[jj]
            # loop thru the weeks
            # redo = 1
            # while redo == 1:
            for kk in range(WEEKS_OF_MATCHES):
                # add the match to the week if a team has less than 2 matches in the week
                dict_key_name = 'week' + str(kk)
                # debugging 
                # ordered_dict[dict_key_name]['away_team'].append(team + "_start_with_loop_1")
                # ordered_dict[dict_key_name]['home_team'].append("start_with_loop_1")
                # tmp_df = pd.DataFrame(ordered_dict[division][dict_key_name])
                tmp_df = pd.DataFrame(ordered_dict[dict_key_name])
                tmp_combined_df = pd.concat([tmp_df['away_team'], tmp_df['home_team']], ignore_index=True)
                # freq_away_teams_for_week = tmp_df['away_team'].value_counts() # for a team this will show how many times they are away in the week  
                # freq_home_teams_for_week = tmp_df['home_team'].value_counts() # for a team this will show how many times they are home in the week 
                freq_teams_for_week = tmp_combined_df.value_counts() # for a team this will show how many times they are away in the week  

                # if the away team or home team we are on already has two matches for this week, skip this week and put them in another week
                if tmp_away_team in freq_teams_for_week:
                    if freq_teams_for_week[tmp_away_team] == 2:
                        # redo = 1
                        continue
                if tmp_home_team in freq_teams_for_week:
                    if freq_teams_for_week[tmp_home_team] == 2:
                        # redo = 1
                        continue

                # there is a problem with this because there are not an even amount of home and away games for each team 
                # maybe we dont care if they have two home games or two away games.  we just dont want them to have more than two games in a week 
                # might not need this check anymore 
                # if (tmp_away_team in freq_teams_for_week): 
                #     if (freq_teams_for_week[tmp_away_team] == 2):
                #         continue
                # if (tmp_home_team in freq_teams_for_week):
                #     if (freq_teams_for_week[tmp_home_team] == 2):
                #         continue

                # if tmp_home_team in freq_home_teams_for_week:
                #     if freq_teams_for_week[tmp_home_team] == 1:
                #         continue

                # at this point we should be good to put this match in this week.  after this we need to break because we dont want to double add a match into a week
                # ordered_dict[division][dict_key_name]['away_team'].append(tmp_away_team)
                # ordered_dict[division][dict_key_name]['home_team'].append(tmp_home_team)
                ordered_dict[dict_key_name]['away_team'].append(tmp_away_team)
                ordered_dict[dict_key_name]['home_team'].append(tmp_home_team)
                # kk = kk + 1
                # matches_per_week[kk] += 1
                # redo = 0
                break

        # debugging 
        # ordered_dict[dict_key_name]['away_team'].append(team + "_done_with_loop_1")
        # ordered_dict[dict_key_name]['home_team'].append("done_with_loop_1")

        # second_df loop
        # kk = 0 # week counter
        for jj in range(len(second_df)):
            # kk = kk % WEEKS_OF_MATCHES
            # debugging 
            # ordered_dict[dict_key_name]['away_team'].append(team + "_start_with_loop_2")
            # ordered_dict[dict_key_name]['home_team'].append("start_with_loop_2")
            tmp_away_team = second_df['away_team'].iloc[jj]
            tmp_home_team = second_df['home_team'].iloc[jj]
            # loop thru the weeks
            for kk in range(WEEKS_OF_MATCHES):
                # add the match to the week if a team has less than 2 matches in the week
                dict_key_name = 'week' + str(kk)
                # tmp_df = pd.DataFrame(ordered_dict[division][dict_key_name])
                tmp_df = pd.DataFrame(ordered_dict[dict_key_name])
                tmp_combined_df = pd.concat([tmp_df['away_team'], tmp_df['home_team']], ignore_index=True)
                # freq_away_teams_for_week = tmp_df['away_team'].value_counts() # for a team this will show how many times they are away in the week  
                # freq_home_teams_for_week = tmp_df['home_team'].value_counts() # for a team this will show how many times they are home in the week 
                freq_teams_for_week = tmp_combined_df.value_counts() # for a team this will show how many times they are away in the week  

                # if the away team or home team we are on already has two matches for this week, skip this week and put them in another week
                # if tmp_away_team in freq_teams_for_week:
                #     if freq_teams_for_week[tmp_away_team] == 2:
                #         continue
                # if tmp_home_team in freq_teams_for_week:
                #     if freq_teams_for_week[tmp_home_team] == 2:
                #         continue

                # there is a problem with this because there are not an even amount of home and away games for each team 
                # maybe we dont care if they have two home games or two away games.  we just dont want them to have more than two games in a week 
                # might not need this check anymore 
                if (tmp_away_team in freq_teams_for_week): 
                    if (freq_teams_for_week[tmp_away_team] == 2):
                        continue
                if (tmp_home_team in freq_teams_for_week):
                    if (freq_teams_for_week[tmp_home_team] == 2):
                        continue
                # if tmp_home_team in freq_home_teams_for_week:
                #     if freq_teams_for_week[tmp_home_team] == 1:
                #         continue

                # at this point we should be good to put this match in this week.  after this we need to break because we dont want to double add a match into a week
                # ordered_dict[division][dict_key_name]['away_team'].append(tmp_away_team)
                # ordered_dict[division][dict_key_name]['home_team'].append(tmp_home_team)
                ordered_dict[dict_key_name]['away_team'].append(tmp_away_team)
                ordered_dict[dict_key_name]['home_team'].append(tmp_home_team)
                # matches_per_week[kk] += 1
                break
                # kk = kk + 1
    
    # get rid of matches we have already ordered so we can see what is left
    for week_idx, week_key in enumerate(ordered_dict.keys()):
        for aa in range(len(ordered_dict[week_key]['away_team'])):
            away_team_df = tracking_df[tracking_df['away_team'] == ordered_dict[week_key]['away_team'][aa]]
            # test = away_team_df[away_team_df['home_team'] == ordered_dict[week_key]['home_team'][aa]]
            absolute_ind_locations = away_team_df[away_team_df['home_team'] == ordered_dict[week_key]['home_team'][aa]].index.tolist()
            # print('test')
            # print(test)
            # print('abs_inds')
            # print(absolute_ind_locations)
            # # print('abs_inds')
            # # print(absolute_ind_locations)
            # # print(test)
            # ind_locations = tracking_df.iloc[absolute_ind_locations]['index'].tolist()
            # print('reg_inds')
            # print(ind_locations)
            # # print(tracking_df.iloc[absolute_ind_locations]['index'].tolist())
            # ((week_idx * len(ordered_dict[week_key]['away_team'])) + aa) should correspond to the overall df 
            if len(absolute_ind_locations) > 0:
                # print('inds')
                # print(ind_locations)
                # print('df')
                # print(tracking_df)
                tracking_df = tracking_df.drop(absolute_ind_locations[0])
            # print(tracking_df)
    
    # print(tracking_df)
    # tracking_df now has the missing matches 
    away_uniques_missing = tracking_df['away_team'].unique()
    home_uniques_missing = tracking_df['home_team'].unique()
    print('away uniques missing')
    print(away_uniques_missing)
    print('home uniques missing')
    print(home_uniques_missing)

    # find the lacking weeks 
    week_lengths = []
    week_names = [] 
    lacking_weeks = []
    for week_idx, week_key in enumerate(ordered_dict.keys()):
        week_length = len(ordered_dict[week_key]['away_team'])
        week_lengths.append(week_length)
        week_names.append(week_key)
    
    for idx, length in enumerate(week_lengths):
        if length != matches_needed_per_week:
            lacking_weeks.append(week_names[idx])
    
    print('lacking weeks')
    print(lacking_weeks)
    print('tracking_df')
    print(tracking_df)

    # # for now, lets just go thru the lacking weeks and add our missing matches to them 
    # while len(tracking_df) > 0:
    #     for lacking_week in lacking_weeks:
    #         if len(tracking_df) == 0:
    #             break
    #         ordered_dict[lacking_week]['away_team'].append(tracking_df['away_team'].iloc[0])
    #         ordered_dict[lacking_week]['home_team'].append(tracking_df['home_team'].iloc[0])
    #         # print()
    #         tracking_df = tracking_df.drop([tracking_df.index[0]])

    # ALMOST WORKED
    # # loop thru the division matches
    # for jj in range(len(division_matches_dict['away_team'])):
    #     tmp_away_team = division_matches_dict['away_team'][jj]
    #     tmp_home_team = division_matches_dict['home_team'][jj]
    #     # loop thru the weeks
    #     for kk in range(WEEKS_OF_MATCHES):
    #         # add the match to the week if a team has less than 2 matches in the week
    #         dict_key_name = 'week' + str(kk)
    #         # tmp_df = pd.DataFrame(ordered_dict[division][dict_key_name])
    #         tmp_df = pd.DataFrame(ordered_dict[dict_key_name])
    #         tmp_combined_df = pd.concat([tmp_df['away_team'], tmp_df['home_team']], ignore_index=True)
    #         # freq_away_teams_for_week = tmp_df['away_team'].value_counts() # for a team this will show how many times they are away in the week  
    #         # freq_home_teams_for_week = tmp_df['home_team'].value_counts() # for a team this will show how many times they are home in the week 
    #         freq_teams_for_week = tmp_combined_df.value_counts() # for a team this will show how many times they are away in the week  

    #         # if the away team or home team we are on already has two matches for this week, skip this week and put them in another week
    #         # if tmp_away_team in freq_teams_for_week:
    #         #     if freq_teams_for_week[tmp_away_team] == 2:
    #         #         continue
    #         # if tmp_home_team in freq_teams_for_week:
    #         #     if freq_teams_for_week[tmp_home_team] == 2:
    #         #         continue

    #         # there is a problem with this because there are not an even amount of home and away games for each team 
    #         # maybe we dont care if they have two home games or two away games.  we just dont want them to have more than two games in a week 
    #         if (tmp_away_team in freq_teams_for_week): 
    #             if (freq_teams_for_week[tmp_away_team] == 2):
    #                 continue
    #         if (tmp_home_team in freq_teams_for_week):
    #             if (freq_teams_for_week[tmp_home_team] == 2):
    #                 continue
    #         # if tmp_home_team in freq_home_teams_for_week:
    #         #     if freq_teams_for_week[tmp_home_team] == 1:
    #         #         continue

    #         # at this point we should be good to put this match in this week.  after this we need to break because we dont want to double add a match into a week
    #         # ordered_dict[division][dict_key_name]['away_team'].append(tmp_away_team)
    #         # ordered_dict[division][dict_key_name]['home_team'].append(tmp_home_team)
    #         ordered_dict[dict_key_name]['away_team'].append(tmp_away_team)
    #         ordered_dict[dict_key_name]['home_team'].append(tmp_home_team)
    #         # matches_per_week[kk] += 1
    #         break

    # division_matches_df_len = len(division_matches_df)
    # ind_counter = 0
    # while(division_matches_df_len > 0):
    #     division_matches_df_len = len(division_matches_df)
    #     # print(division_matches_df_len)
    #     print(division_matches_df)

    #     ind_counter = ind_counter % division_matches_df_len

    #     # loop thru the division matches
    #     # for jj in range(division_matches_df_len):
    #     tmp_away_team = division_matches_df['away_team'].iloc[ind_counter]
    #     tmp_home_team = division_matches_df['home_team'].iloc[ind_counter]
    #     # loop thru the weeks
    #     for kk in range(WEEKS_OF_MATCHES):
    #         # add the match to the week if a team has less than 2 matches in the week
    #         dict_key_name = 'week' + str(kk)
    #         # tmp_df = pd.DataFrame(ordered_dict[division][dict_key_name])
    #         tmp_df = pd.DataFrame(ordered_dict[dict_key_name])
    #         tmp_combined_df = pd.concat([tmp_df['away_team'], tmp_df['home_team']], ignore_index=True)
    #         # freq_away_teams_for_week = tmp_df['away_team'].value_counts() # for a team this will show how many times they are away in the week  
    #         # freq_home_teams_for_week = tmp_df['home_team'].value_counts() # for a team this will show how many times they are home in the week 
    #         freq_teams_for_week = tmp_combined_df.value_counts() # for a team this will show how many times they are away in the week  

    #         # if the away team or home team we are on already has two matches for this week, skip this week and put them in another week
    #         # if tmp_away_team in freq_teams_for_week:
    #         #     if freq_teams_for_week[tmp_away_team] == 2:
    #         #         continue
    #         # if tmp_home_team in freq_teams_for_week:
    #         #     if freq_teams_for_week[tmp_home_team] == 2:
    #         #         continue

    #         # there is a problem with this because there are not an even amount of home and away games for each team 
    #         # maybe we dont care if they have two home games or two away games.  we just dont want them to have more than two games in a week 
    #         if (tmp_away_team in freq_teams_for_week): 
    #             if (freq_teams_for_week[tmp_away_team] == 2):
    #                 continue
    #         if (tmp_home_team in freq_teams_for_week):
    #             if (freq_teams_for_week[tmp_home_team] == 2):
    #                 continue
    #         # if tmp_home_team in freq_home_teams_for_week:
    #         #     if freq_teams_for_week[tmp_home_team] == 1:
    #         #         continue

    #         # at this point we should be good to put this match in this week.  after this we need to break because we dont want to double add a match into a week
    #         # ordered_dict[division][dict_key_name]['away_team'].append(tmp_away_team)
    #         # ordered_dict[division][dict_key_name]['home_team'].append(tmp_home_team)
    #         ordered_dict[dict_key_name]['away_team'].append(tmp_away_team)
    #         ordered_dict[dict_key_name]['home_team'].append(tmp_home_team)
    #         # matches_per_week[kk] += 1
    #         division_matches_df = division_matches_df.reset_index()
    #         division_matches_df = division_matches_df.drop([ind_counter])
    #         break
    #     
    #     ind_counter += 1

    #     division_matches_df_len = len(division_matches_df)




            # curr_teams_playing_this_week = ordered_dict[division][dict_key_name]['away_team'] + ordered_dict[division][dict_key_name]['home_team']
                # home_away_combined_df = pd.concat([permutation_matches_df['away_team'], permutation_matches_df['home_team']], ignore_index=True)
                # current_matches_per_team_series = home_away_combined_df.value_counts()

            
    return ordered_dict, tracking_df

# order_matches(varsity_matches_dict[varsity_divisions[0]], varsity_divisions[0], WEEKS_OF_MATCHES)
            

In [430]:
def find_weeks_with_one_match_left(ordered_dict, WEEKS_OF_MATCHES, TOTAL_DIVISION_MATCHES):
    weeks = []
    for ii in range(WEEKS_OF_MATCHES):
        dict_key = 'week' + str(ii)
        if len(ordered_dict[dict_key]['away_team']) == ((TOTAL_DIVISION_MATCHES / WEEKS_OF_MATCHES) - 1):
            weeks.append(dict_key)
    return weeks

test_nested_empty = {'away_team': [], 'home_team': []}
test_nested_one_off = {'away_team': ['Liberty University', 'Ferris State University', 'Adrian College', 'Southern Illinois University Edwardsville'], 'home_team': ['Illinois State University', 'Illinois State University', 'Southern Illinois University Edwardsville', 'Ferris State University']}
test_ordered_dict = {'week0': test_nested_empty, 'week1': test_nested_empty, 'week2': test_nested_one_off, 'week3': test_nested_empty, 'week4': test_nested_one_off, 'week5': test_nested_empty}

print(find_weeks_with_one_match_left(test_ordered_dict, WEEKS_OF_MATCHES=6, TOTAL_DIVISION_MATCHES=30))


['week2', 'week4']


In [431]:
def complete_weeks_with_one_match_left(ordered_dict, weeks_with_one_match_left, sorted_division_matches_df):
    # get all the matches so far 
    matches_so_far = {'away_team': [], 'home_team': []}
    # matches_left = {'away_team': [], 'home_team': []}
    matches_to_remove = {'away_team': [], 'home_team': []}
    for week_key in ordered_dict.keys():
        for ii in range(len(ordered_dict[week_key]['away_team'])):
            matches_so_far['away_team'].append(ordered_dict[week_key]['away_team'][ii])
            matches_so_far['home_team'].append(ordered_dict[week_key]['home_team'][ii])
    
    matches_so_far_df = pd.DataFrame(matches_so_far)

    matches_left = sorted_division_matches_df[sorted_division_matches_df['alive'] == 1]

    # # take them out of sorted_division_matches_df (call this matches_left)
    # # this does not work properly 
    # # matches_left = pd.concat([sorted_division_matches_df, matches_so_far_df, matches_so_far_df], ignore_index=True).drop_duplicates(keep=False, ignore_index=True)
    # # x = pd.MultiIndex.from_arrays([sorted_division_matches_df['away_team'], sorted_division_matches_df['home_team']])
    # # y = pd.MultiIndex.from_arrays([matches_so_far_df['away_team'], matches_so_far_df['home_team']])
    # # inter = x.difference(y)
    # # matches_left = sorted_division_matches_df.set_index(['away_team', 'home_team']).loc[inter].reset_index()
    # inds_to_drop = []
    # for jj in range(len(matches_so_far_df)):
    #     so_far_away_team = matches_so_far_df.iloc[jj, 0].strip()
    #     so_far_home_team = matches_so_far_df.iloc[jj, 1].strip()
    #     for ii in range(len(alive_matches_df)):
    #         sorted_away_team = alive_matches_df.iloc[ii, 0].strip()
    #         sorted_home_team = alive_matches_df.iloc[ii, 1].strip()

    #         if sorted_away_team == so_far_away_team and sorted_home_team == so_far_home_team:
    #             if not(ii in inds_to_drop):
    #                 inds_to_drop.append(ii)
    #                 break

    # sorted_division_matches_df = sorted_division_matches_df.reset_index(drop=True)

    # matches_left = sorted_division_matches_df.drop(inds_to_drop)
    # # strip all whitespace from within the df
    # matches_left = matches_left.applymap(lambda x: " ".join(x.split()) if isinstance(x, str) else x)


    # see how many teams only have 1 match (should only be two teams)
    # find the combination in matches_left that works and add it to ordered_dict
    # add that match to matches_to_remove
    for week in weeks_with_one_match_left:
        away_teams = ordered_dict[week]['away_team']
        home_teams = ordered_dict[week]['home_team']
        home_away_teams = away_teams + home_teams
        np_home_away_teams = np.array(home_away_teams)
        unique_teams, frequency_teams = np.unique(np_home_away_teams, return_counts=True)
        # print(np_home_away_teams)
        # print(unique_teams)
        # print(frequency_teams)
        teams_to_match = []
        for team_ind in range(len(unique_teams)):
            team = unique_teams[team_ind]
            team_freq = frequency_teams[team_ind]
            if team_freq == 1:
                teams_to_match.append(team)
        
        # print(week)
        
        # might be an issue here, i think we need to remove the item from matches_left after we use it 
        for ii in range(len(matches_left)):
            if matches_left.iloc[ii, 2] == 0:
                continue
            if matches_left.iloc[ii, 0].strip() == teams_to_match[0].strip() and matches_left.iloc[ii, 1].strip() == teams_to_match[1].strip():
                away_team_to_match = teams_to_match[0]
                home_team_to_match = teams_to_match[1]
                ordered_dict[week]['away_team'].append(away_team_to_match)
                ordered_dict[week]['home_team'].append(home_team_to_match)
                matches_to_remove['away_team'].append(away_team_to_match)
                matches_to_remove['home_team'].append(home_team_to_match)
                # remove the match we used from matches_left as well 
                matches_left.iloc[ii, 2] = 0  # sets alive field to 0
                # drop_match_ind = matches_left[matches_left['away_team'] == away_team_to_match][matches_left['home_team'] == home_team_to_match].index[0]
                # matches_left = matches_left.drop(drop_match_ind)
                break
            elif matches_left.iloc[ii, 0] == teams_to_match[1] and matches_left.iloc[ii, 1] == teams_to_match[0]:
                away_team_to_match = teams_to_match[1]
                home_team_to_match = teams_to_match[0]
                ordered_dict[week]['away_team'].append(away_team_to_match)
                ordered_dict[week]['home_team'].append(home_team_to_match)
                matches_to_remove['away_team'].append(away_team_to_match)
                matches_to_remove['home_team'].append(home_team_to_match)
                # remove the match we used from matches_left as well 
                matches_left.iloc[ii, 2] = 0  # sets alive field to 0
                # drop_match_ind = matches_left[matches_left['away_team'] == away_team_to_match][matches_left['home_team'] == home_team_to_match].index[0]
                # matches_left = matches_left.drop(drop_match_ind)
                break


    return ordered_dict, matches_to_remove

test_nested_empty = {'away_team': [], 'home_team': []}
test_nested_one_off = {'away_team': ['Liberty University', 'Ferris State University', 'Adrian College', 'Southern Illinois University Edwardsville'], 'home_team': ['Illinois State University', 'Illinois State University', 'Southern Illinois University Edwardsville', 'Ferris State University']}
test_nested_one_off_2 = {'away_team': ['Southern Illinois University Edwardsville', 'Southern Illinois University Edwardsville', 'Illinois State University', 'Illinois State University'], 'home_team': ['Ferris State University', 'Liberty University', 'Ferris State University', 'Adrian College']}
test_ordered_dict = {'week0': test_nested_empty, 'week1': test_nested_empty, 'week2': test_nested_one_off, 'week3': test_nested_empty, 'week4': test_nested_one_off_2, 'week5': test_nested_empty}

test_division_matches = club_matches_dict[club_divisions[1]].copy()
test_division_matches_df = pd.DataFrame(test_division_matches)
test_division_matches_df['alive'] = 1
test_one_match_left_weeks = find_weeks_with_one_match_left(test_ordered_dict, WEEKS_OF_MATCHES=6, TOTAL_DIVISION_MATCHES=30)

test_res1, test_res2 = complete_weeks_with_one_match_left(test_ordered_dict, test_one_match_left_weeks, test_division_matches_df)
print('ordered_dict')
print(test_res1)
print('to remove')
print(test_res2)

ordered_dict
{'week0': {'away_team': [], 'home_team': []}, 'week1': {'away_team': [], 'home_team': []}, 'week2': {'away_team': ['Liberty University', 'Ferris State University', 'Adrian College', 'Southern Illinois University Edwardsville', 'Adrian College'], 'home_team': ['Illinois State University', 'Illinois State University', 'Southern Illinois University Edwardsville', 'Ferris State University', 'Liberty University']}, 'week3': {'away_team': [], 'home_team': []}, 'week4': {'away_team': ['Southern Illinois University Edwardsville', 'Southern Illinois University Edwardsville', 'Illinois State University', 'Illinois State University', 'Liberty University'], 'home_team': ['Ferris State University', 'Liberty University', 'Ferris State University', 'Adrian College', 'Adrian College']}, 'week5': {'away_team': [], 'home_team': []}}
to remove
{'away_team': ['Adrian College', 'Liberty University'], 'home_team': ['Liberty University', 'Adrian College']}


In [432]:
def strip_whitespace_df(df):
    df = df.applymap(lambda x: " ".join(x.split()) if isinstance(x, str) else x)
    return df

In [433]:
# function to remove matches from DF 
def remove_matches(input_df, matches_to_remove):
    # strip white space from df 
    input_df = strip_whitespace_df(input_df)

    # loop thru matches_to_remove
    for ii in range(len(matches_to_remove['away_team'])):
        away_team = matches_to_remove['away_team'][ii]
        home_team = matches_to_remove['home_team'][ii]
        ind_of_interest = input_df[input_df['away_team'] == away_team][input_df['home_team'] == home_team].index[0]
        # print(matches_to_remove['away_team'][ii])
        # print(matches_to_remove['home_team'][ii])
        # input_df = input_df.drop(ind_of_interest)
        input_df.iloc[ind_of_interest, 2] = 0 # set the alive value to zero
    return input_df

test_division_matches_df['alive'] = 1
test_new_df = remove_matches(test_division_matches_df, test_res2)
print('before')
print(test_division_matches_df)
print('after')
print(test_new_df)

before
                                    away_team  \
0                          Liberty University   
1                     Ferris State University   
2                              Adrian College   
3   Southern Illinois University Edwardsville   
4                  Illinois State University    
5                     Ferris State University   
6                              Adrian College   
7   Southern Illinois University Edwardsville   
8                  Illinois State University    
9                          Liberty University   
10                             Adrian College   
11  Southern Illinois University Edwardsville   
12                 Illinois State University    
13                         Liberty University   
14                    Ferris State University   
15  Southern Illinois University Edwardsville   
16                 Illinois State University    
17                         Liberty University   
18                    Ferris State University   
19           

  df = df.applymap(lambda x: " ".join(x.split()) if isinstance(x, str) else x)
  ind_of_interest = input_df[input_df['away_team'] == away_team][input_df['home_team'] == home_team].index[0]
  ind_of_interest = input_df[input_df['away_team'] == away_team][input_df['home_team'] == home_team].index[0]


In [434]:
def check_eq_two(weeks_dict):
    combine_lists = weeks_dict['away_team'] + weeks_dict['home_team']
    if len(combine_lists) == 0:
        return []

    np_combine_lists = np.array(combine_lists)
    uniques, freqs = np.unique(np_combine_lists, return_counts=True)

    teams_at_two = []

    for idx, team in enumerate(uniques):
        if freqs[idx] == 2:
            teams_at_two.append(team)

    return teams_at_two

test_week_good = {'away_team': [], 'home_team': []}
test_week_both = {'away_team': ['Liberty University', 'Ferris State University', 'Adrian College', 'Southern Illinois University Edwardsville'], 'home_team': ['Illinois State University', 'Illinois State University', 'Southern Illinois University Edwardsville', 'Ferris State University']}

print(check_eq_two(test_week_good))
print(check_eq_two(test_week_both))

[]
['Ferris State University', 'Illinois State University', 'Southern Illinois University Edwardsville']


In [435]:

def order_matches(division_matches_dict, division, WEEKS_OF_MATCHES, hard_codings=None):
    ordered_dict = {}

    # initialize what we want to return 
    for ii in range(WEEKS_OF_MATCHES):
        dict_key = 'week' + str(ii)
        # ordered_dict[division][dict_key] = {'away_team': [], 'home_team': []}   # this will hold a list of the matches for this week
        ordered_dict[dict_key] = {'away_team': [], 'home_team': []}   # this will hold a list of the matches for this week
    
    # get number of teams in the division and find out how many matches are needed per week 
    division_matches_df = pd.DataFrame(division_matches_dict)
    division_matches_df = strip_whitespace_df(division_matches_df)
    total_division_matches = len(division_matches_df)
    num_teams_in_division = len(np.unique(division_matches_df['away_team']))
    matches_needed_per_week = total_division_matches / WEEKS_OF_MATCHES
    
    # sort home teams alphabetically
    sorted_division_matches_df = division_matches_df.sort_values('home_team', ignore_index=True)
    # add an alive column
    sorted_division_matches_df['alive'] = 1

    # get number of home games for each team 
    home_team_counts = sorted_division_matches_df['home_team'].value_counts(ascending=False)

    # find team with the max number of home games 
    max_home_team_counts = home_team_counts.keys()[0]

    # get the number of home games list for each team but exclude the max team
    home_team_counts_reduced = home_team_counts[len(home_team_counts) - 1:0:-1]
    home_team_names_reduced = home_team_counts_reduced.keys()

    # add the max_home_team_counts teams matches (all of them) to the ordered_dict
    max_home_team_home_matches = sorted_division_matches_df[sorted_division_matches_df['home_team'] == max_home_team_counts]
    max_home_team_away_matches = sorted_division_matches_df[sorted_division_matches_df['away_team'] == max_home_team_counts]
    max_home_team_all_matches = pd.concat([max_home_team_home_matches, max_home_team_away_matches], ignore_index=True)
    initial_inds_to_kill = []
    for match_ind in range(len(max_home_team_all_matches)):
        for week_ind in range(WEEKS_OF_MATCHES):
            dict_key = 'week' + str(week_ind)
            if (len(ordered_dict[dict_key]['away_team']) == 2) or (len(ordered_dict[dict_key]['home_team']) == 2):
                continue
            ordered_dict[dict_key]['away_team'].append(max_home_team_all_matches.iloc[match_ind, 0])
            ordered_dict[dict_key]['home_team'].append(max_home_team_all_matches.iloc[match_ind, 1])
            # get the same location of this match in the sorted total df 
            pot_ind_to_kill = sorted_division_matches_df[sorted_division_matches_df['away_team'] == max_home_team_all_matches.iloc[match_ind, 0]][sorted_division_matches_df['home_team'] == max_home_team_all_matches.iloc[match_ind, 1]][sorted_division_matches_df['alive'] == 1].index
            for ind in pot_ind_to_kill:
                if ind in initial_inds_to_kill:
                    continue
                else:
                    initial_inds_to_kill.append(ind)
                    break
            break
    
    # kill the inds of the stuff we just added 
    # print(initial_inds_to_kill)
    sorted_division_matches_df.iloc[initial_inds_to_kill, 2] = 0
    # sorted_division_matches_df.iloc[11, 2] = 0
    # print(sorted_division_matches_df)
    print('all match pairings')
    print(sorted_division_matches_df)

    print('ordered matches so far')
    print(ordered_dict)
    

    # ABSOLUTE RULES:
    # (1) If a week has one more match remaining, complete that week with the appropriate situation
    # (2) No more matches than two matches per week 

    # loop thru home_team_names_reduced
    for team_name in home_team_names_reduced:
        # get all the matches for the team
        team_away_matches = sorted_division_matches_df[sorted_division_matches_df['away_team'] == team_name][sorted_division_matches_df['alive'] == 1]
        team_home_matches = sorted_division_matches_df[sorted_division_matches_df['home_team'] == team_name][sorted_division_matches_df['alive'] == 1]
        team_all_matches = pd.concat([team_home_matches, team_away_matches], ignore_index=True)

        looping_team = team_name
        looping_team_match_counter = 0
        # print(team_all_matches)

        ########### LEFT OFF HERE ###############
        # need to add support for the new alive column i added.  if alive = 1 the match can still be used, if alive = 0 the match has already been used and cannot be used again
        # should add this support all over here and also in the complete_weeks_with_one_match_left (and maybe more)
        ########### LEFT OFF HERE ###############

        print('team')
        print(team_name)
        print('ordered matches when back at top of team loop')
        print(ordered_dict)

        # loop thru matches 
        for team_matches_ind in range(len(team_all_matches)):
            # make sure we are not on a dead match
            team_away_matches = sorted_division_matches_df[sorted_division_matches_df['away_team'] == team_name][sorted_division_matches_df['alive'] == 1]
            team_home_matches = sorted_division_matches_df[sorted_division_matches_df['home_team'] == team_name][sorted_division_matches_df['alive'] == 1]
            team_all_matches = pd.concat([team_home_matches, team_away_matches], ignore_index=True)
            # if we are on a dead match, we should continue 
            if team_all_matches.iloc[team_matches_ind, 2] == 0:
                continue

            # if any week has one match left, complete it 
            one_match_left_weeks = find_weeks_with_one_match_left(ordered_dict, WEEKS_OF_MATCHES, TOTAL_DIVISION_MATCHES=total_division_matches)
            if len(one_match_left_weeks) > 0:
                print('in situation where a week has one match left to be compelted')
                ordered_dict, matches_to_remove = complete_weeks_with_one_match_left(ordered_dict, one_match_left_weeks, sorted_division_matches_df)
                # comment out for now
                # # we need to remove the matches in sorted_division_matches_df (this might cause an issue)
                # sorted_division_matches_df = remove_matches(sorted_division_matches_df, matches_to_remove)
                # we need to remove the matches in sorted_division_matches_df
                sorted_division_matches_df = remove_matches(sorted_division_matches_df, matches_to_remove)
                # whenever we update sorted_division_matches_df we need to update team_all_matches
                team_away_matches = sorted_division_matches_df[sorted_division_matches_df['away_team'] == team_name][sorted_division_matches_df['alive'] == 1]
                team_home_matches = sorted_division_matches_df[sorted_division_matches_df['home_team'] == team_name][sorted_division_matches_df['alive'] == 1]
                team_all_matches = pd.concat([team_home_matches, team_away_matches], ignore_index=True)
                # if we are on a dead match, we should continue 
                if team_all_matches.iloc[team_matches_ind, 2] == 0:
                    continue

            # get the non looping team 
            away_team = team_all_matches.iloc[team_matches_ind, 0]
            home_team = team_all_matches.iloc[team_matches_ind, 1]

            # for now lets not consider this, it can get hairy 
            # # need to check if any of the matches to remove is one of the matches we are currently looping thru, if so, we should skip this one 
            # are_we_on_a_match_to_remove = 0
            # for ii in range(len(matches_to_remove)):
            #     away_remove_team = matches_to_remove['away_team'][ii]
            #     home_remove_team = matches_to_remove['home_team'][ii]

            #     if away_remove_team == away_team and home_remove_team == home_team:
            #         are_we_on_a_match_to_remove = 1



            if away_team == looping_team:
                non_looping_team = home_team
            else:
                non_looping_team = away_team
            
            # impossible counter
            impossible_count = 0

            # loop thru each week 
            for week_ind in range(WEEKS_OF_MATCHES):
                dict_key = 'week' + str(week_ind)

                impossible_count_incremented = 0

                teams_at_two = check_eq_two(ordered_dict[dict_key])

                # if any week has one match left, complete it 
                one_match_left_weeks = find_weeks_with_one_match_left(ordered_dict, WEEKS_OF_MATCHES, TOTAL_DIVISION_MATCHES=total_division_matches)
                if len(one_match_left_weeks) > 0:
                    print('in situation where a week has one match left to be compelted 2')
                    ordered_dict, matches_to_remove = complete_weeks_with_one_match_left(ordered_dict, one_match_left_weeks, sorted_division_matches_df)
                    # comment out for now
                    # # we need to remove the matches in sorted_division_matches_df (this might cause an issue)
                    # sorted_division_matches_df = remove_matches(sorted_division_matches_df, matches_to_remove)
                    sorted_division_matches_df = remove_matches(sorted_division_matches_df, matches_to_remove)
                    # whenever we update sorted_division_matches_df we need to update team_all_matches
                    team_away_matches = sorted_division_matches_df[sorted_division_matches_df['away_team'] == team_name][sorted_division_matches_df['alive'] == 1]
                    team_home_matches = sorted_division_matches_df[sorted_division_matches_df['home_team'] == team_name][sorted_division_matches_df['alive'] == 1]
                    team_all_matches = pd.concat([team_home_matches, team_away_matches], ignore_index=True)
                    # if we are on a dead match, we should check the next match 
                    if team_all_matches.iloc[team_matches_ind, 2] == 0:
                        break  # break here because we want to exit the weeks loop and iterate to another match 
                
                if impossible_count < WEEKS_OF_MATCHES:
                    # if the non_looping_team is within the weeks matches, increment impossible_count and skip
                    for ii in range(len(ordered_dict[dict_key]['away_team'])):
                        week_away_team = ordered_dict[dict_key]['away_team'][ii]
                        week_home_team = ordered_dict[dict_key]['home_team'][ii]

                        # print('week away team')
                        # print(week_away_team)
                        # print('week home team')
                        # print(week_home_team)
                        print('non looping team')
                        print(non_looping_team)
                        if week_away_team == non_looping_team or week_home_team == non_looping_team:
                            impossible_count += 1
                            impossible_count_incremented = 1
                            break
                    
                    if impossible_count_incremented:
                        print('impossible count incremented, skipping this week')
                        continue
                    else:
                        # before we add we need to make sure there are not more than 2 of each team in the week, if there are, we must skip
                        # teams_at_two = check_eq_two(ordered_dict[dict_key])
                        if away_team in teams_at_two:
                            print('team has 2 matches in this week already')
                            impossible_count += 1
                            continue
                        
                        if home_team in teams_at_two:
                            print('team has 2 matches in this week already')
                            impossible_count += 1
                            continue
                        # add the match 
                        # print('here1')
                        # ADDING
                        print('ADDING MATCH')
                        ordered_dict[dict_key]['away_team'].append(away_team)
                        ordered_dict[dict_key]['home_team'].append(home_team)
                        # remove this match now from the sorted df 
                        sorted_division_matches_df[sorted_division_matches_df['away_team'] == away_team][sorted_division_matches_df['home_team'] == home_team]['alive'] = 0
                        team_away_matches = sorted_division_matches_df[sorted_division_matches_df['away_team'] == team_name][sorted_division_matches_df['alive'] == 1]
                        team_home_matches = sorted_division_matches_df[sorted_division_matches_df['home_team'] == team_name][sorted_division_matches_df['alive'] == 1]
                        team_all_matches = pd.concat([team_home_matches, team_away_matches], ignore_index=True)


                        # comment out for now
                        # # find this match within the sorted_df and remove it 
                        # ind_to_remove = sorted_division_matches_df[sorted_division_matches_df['away_team'] == away_team][sorted_division_matches_df['home_team'] == home_team].index[0]
                        # sorted_division_matches_df.drop(ind_to_remove)
                        # break because now we want to move on to the next match (dont want to add the same match twice)
                        break
                else:
                    # before we add we need to make sure there are not more than 2 of each team in the week, if there are, we must skip
                    # teams_at_two = check_eq_two(ordered_dict[dict_key])
                    if away_team in teams_at_two:
                        print('team has 2 matches in this week already 2')
                        continue
                    
                    if home_team in teams_at_two:
                        print('team has 2 matches in this week already 2')
                        continue
                    # add the match 
                    # print('here2')
                    # ADDING
                    print('ADDING MATCH')
                    ordered_dict[dict_key]['away_team'].append(away_team)
                    ordered_dict[dict_key]['home_team'].append(home_team)
                    # remove this match now from the sorted df 
                    sorted_division_matches_df[sorted_division_matches_df['away_team'] == away_team][sorted_division_matches_df['home_team'] == home_team]['alive'] = 0
                    team_away_matches = sorted_division_matches_df[sorted_division_matches_df['away_team'] == team_name][sorted_division_matches_df['alive'] == 1]
                    team_home_matches = sorted_division_matches_df[sorted_division_matches_df['home_team'] == team_name][sorted_division_matches_df['alive'] == 1]
                    team_all_matches = pd.concat([team_home_matches, team_away_matches], ignore_index=True)


                    # comment out for now
                    # # find this match within the sorted_df and remove it 
                    # ind_to_remove = sorted_division_matches_df[sorted_division_matches_df['away_team'] == away_team][sorted_division_matches_df['home_team'] == home_team].index[0]
                    # sorted_division_matches_df.drop(ind_to_remove)
                    # break because now we want to move on to the next match (dont want to add the same match twice)
                    break


    return ordered_dict

# order_matches(varsity_matches_dict[varsity_divisions[0]], varsity_divisions[0], WEEKS_OF_MATCHES)
# print(club_divisions[1])
test = order_matches(club_matches_dict[club_divisions[1]], club_divisions[1], WEEKS_OF_MATCHES)
print(test)



all match pairings
                                    away_team  \
0                     Ferris State University   
1   Southern Illinois University Edwardsville   
2                          Liberty University   
3                   Illinois State University   
4   Southern Illinois University Edwardsville   
5                              Adrian College   
6                   Illinois State University   
7                          Liberty University   
8                              Adrian College   
9   Southern Illinois University Edwardsville   
10  Southern Illinois University Edwardsville   
11                         Liberty University   
12                             Adrian College   
13  Southern Illinois University Edwardsville   
14                    Ferris State University   
15                             Adrian College   
16  Southern Illinois University Edwardsville   
17                         Liberty University   
18                    Ferris State University   
1

  df = df.applymap(lambda x: " ".join(x.split()) if isinstance(x, str) else x)
  pot_ind_to_kill = sorted_division_matches_df[sorted_division_matches_df['away_team'] == max_home_team_all_matches.iloc[match_ind, 0]][sorted_division_matches_df['home_team'] == max_home_team_all_matches.iloc[match_ind, 1]][sorted_division_matches_df['alive'] == 1].index
  pot_ind_to_kill = sorted_division_matches_df[sorted_division_matches_df['away_team'] == max_home_team_all_matches.iloc[match_ind, 0]][sorted_division_matches_df['home_team'] == max_home_team_all_matches.iloc[match_ind, 1]][sorted_division_matches_df['alive'] == 1].index
  pot_ind_to_kill = sorted_division_matches_df[sorted_division_matches_df['away_team'] == max_home_team_all_matches.iloc[match_ind, 0]][sorted_division_matches_df['home_team'] == max_home_team_all_matches.iloc[match_ind, 1]][sorted_division_matches_df['alive'] == 1].index
  pot_ind_to_kill = sorted_division_matches_df[sorted_division_matches_df['away_team'] == max_home_tea

In [436]:
# write to CSVs WITHOUT ORDERING

# VARSITY
for division in varsity_matches_dict.keys():
    division_name = division.strip()
    if division_name == "Central/West":
        division_name = "CentralWest"
    elif division_name == "South/West":
        division_name = "SouthWest"

    dict_needed = varsity_matches_dict[division]
    df_needed = pd.DataFrame(dict_needed)
    # df_needed.to_csv(os.path.join(EXPERIMENTAL, SCHEDULE_FOLDER, 'varsity_' + division_name + '.csv'))
    df_needed.to_csv(os.path.join(OFFICIAL, SCHEDULE_FOLDER, 'varsity_' + division_name + '.csv'))

# OPEN
for division in open_matches_dict.keys():
    division_name = division.strip()
    if division_name == "Central/West":
        division_name = "CentralWest"
    elif division_name == "South/West":
        division_name = "SouthWest"

    dict_needed = open_matches_dict[division]
    df_needed = pd.DataFrame(dict_needed)
    # df_needed.to_csv(os.path.join(EXPERIMENTAL, SCHEDULE_FOLDER, 'open_' + division_name + '.csv'))
    df_needed.to_csv(os.path.join(OFFICIAL, SCHEDULE_FOLDER, 'open_' + division_name + '.csv'))

# CLUB
for division in club_matches_dict.keys():
    division_name = division.strip()
    if division_name == "Central/West":
        division_name = "CentralWest"
    elif division_name == "South/West":
        division_name = "SouthWest"

    dict_needed = club_matches_dict[division]
    df_needed = pd.DataFrame(dict_needed)
    # df_needed.to_csv(os.path.join(EXPERIMENTAL, SCHEDULE_FOLDER, 'club_' + division_name + '.csv'))
    df_needed.to_csv(os.path.join(OFFICIAL, SCHEDULE_FOLDER, 'club_' + division_name + '.csv'))

In [364]:
# STILL WORKING ON THIS 
# Write to CSVs WITH ORDERING
# needed unnesting function
def unnesting(df, explode):
    idx = df.index.repeat(df[explode[0]].str.len())
    df1 = pd.concat([
        pd.DataFrame({x: np.concatenate(df[x].values)}) for x in explode], axis=1)
    df1.index = idx

    return df1

    # return df1.join(df.drop(explode, 1), how='left')

# VARSITY
for division in varsity_matches_dict.keys():
    division_name = division.strip()
    if division_name == "Central/West":
        division_name = "CentralWest"
    elif division_name == "South/West":
        division_name = "SouthWest"

    dict_needed = varsity_matches_dict[division]
    # order the matches 
    ordered_matches_dict = order_matches(dict_needed, division, WEEKS_OF_MATCHES)
    # ordered_matches_dict, tracking_matches_df = order_matches(dict_needed, division, WEEKS_OF_MATCHES)
    df_part_one = pd.DataFrame.from_dict(ordered_matches_dict, orient="index")
    df_needed = unnesting(df_part_one, ['away_team', 'home_team'])

    df_needed.to_csv(os.path.join(ORDERED_SCHEDULE_FOLDER, 'ordered_varsity_' + division_name + '.csv'))
    # tracking_matches_df.to_csv(os.path.join(ORDERED_SCHEDULE_FOLDER, 'missing_' + division_name + '.csv'))

# OPEN
for division in open_matches_dict.keys():
    division_name = division.strip()
    if division_name == "Central/West":
        division_name = "CentralWest"
    elif division_name == "South/West":
        division_name = "SouthWest"

    dict_needed = open_matches_dict[division]
    # order the matches 
    ordered_matches_dict = order_matches(dict_needed, division, WEEKS_OF_MATCHES)
    # ordered_matches_dict, tracking_matches_df = order_matches(dict_needed, division, WEEKS_OF_MATCHES)
    df_part_one = pd.DataFrame.from_dict(ordered_matches_dict, orient="index")
    df_needed = unnesting(df_part_one, ['away_team', 'home_team'])

    df_needed.to_csv(os.path.join(ORDERED_SCHEDULE_FOLDER, 'ordered_open_' + division_name + '.csv'))
    # tracking_matches_df.to_csv(os.path.join(ORDERED_SCHEDULE_FOLDER, 'missing_' + division_name + '.csv'))

# CLUB
for division in club_matches_dict.keys():
    division_name = division.strip()
    if division_name == "Central/West":
        division_name = "CentralWest"
    elif division_name == "South/West":
        division_name = "SouthWest"

    dict_needed = club_matches_dict[division]
    # order the matches 
    ordered_matches_dict = order_matches(dict_needed, division, WEEKS_OF_MATCHES)
    # ordered_matches_dict, tracking_matches_df = order_matches(dict_needed, division, WEEKS_OF_MATCHES)
    df_part_one = pd.DataFrame.from_dict(ordered_matches_dict, orient="index")
    df_needed = unnesting(df_part_one, ['away_team', 'home_team'])

    df_needed.to_csv(os.path.join(ORDERED_SCHEDULE_FOLDER, 'ordered_club_' + division_name + '.csv'))
    # tracking_matches_df.to_csv(os.path.join(ORDERED_SCHEDULE_FOLDER, 'missing_' + division_name + '.csv'))

week away team
17
week home team
Ole Miss
non looping team
27
week away team
16
week home team
St. Edward's University
non looping team
27


  df = df.applymap(lambda x: " ".join(x.split()) if isinstance(x, str) else x)
  ind_to_remove = sorted_division_matches_df[sorted_division_matches_df['away_team'] == away_team][sorted_division_matches_df['home_team'] == home_team].index[0]


IndexError: index 0 is out of bounds for axis 0 with size 0