### Parse the district html

#### Goal: get dataframe that includes entire field with division along with their district assignments

In [1]:
from bs4 import BeautifulSoup
import pandas as pd
from fuzzywuzzy import fuzz, process
import re

# Load HTML
html_path = 'data/districts_2023.html'
with open(html_path, 'r') as f:
    html = f.read()

# Parse HTML
soup = BeautifulSoup(html, 'html.parser')

# Extract data
divisions = soup.find_all('div', class_='keep-together')

data = []
for division in divisions:
    division_number = division.find('span', {'data-bind': 'text:Division'}).text
    tournament_name = division.find('span', {'data-bind': 'text:TournamentName'}).text
    host = division.find('span', {'data-bind': 'text:Host'}).text
    teams = [a.text for a in division.find_all('a')[1:]]

    for team in teams:
        data.append({
            'team': team,
            'division': division_number,
            'district': int(re.sub(r'\D+', '', tournament_name)),
            'host': host,
        })

# Create DataFrame
df_by_district = pd.DataFrame(data)

# Load the csv that contains nickname info
df_nickname = pd.read_csv('data/school_info/mhsaa_school_nickname_color_2020.csv')
df_nickname.columns = df_nickname.columns.str.lower()

# Fuzzy match team names
matches = df_by_district['team'].apply(lambda x: process.extractOne(x, df_nickname['school'], scorer=fuzz.ratio))

df_by_district['match_name'] = [i[0] for i in matches]
df_by_district['score'] = [i[1] for i in matches]

# Merge df_by_district and df_nickname on the common columns generated by fuzzy matching
final_df = pd.merge(df_by_district, df_nickname, left_on='match_name', right_on='school', how='inner')

# Select only the columns you're interested in
final_df = final_df[['team', 'division', 'district', 'host', 'nickname', 'color1', 'color2', 'color3', 'color4', 'score']]

# Display the final dataframe
# print(final_df)
final_df.head(20)

Unnamed: 0,team,division,district,host,nickname,color1,color2,color3,color4,score
0,Alpena,1,1,Marquette,Wildcats,Green,White,,,100
1,Marquette,1,1,Marquette,Redmen/Redettes,Red,White,,,100
2,Mount Pleasant,1,1,Marquette,Oilers,Blue,Gold,,,100
3,Traverse City Central,1,1,Marquette,Trojans,Black,Gold,,,100
4,Warren Fitzgerald,2,57,Hazel Park,Trojans,Black,Gold,,,53
5,Traverse City West,1,1,Marquette,Titans,Forest Green,Vegas Gold,,,100
6,Bay City Central,1,2,Midland Dow,Wolves,Purple,Gold,,,100
7,Bay City Western,1,2,Midland Dow,Warriors,Brown,Gold,,,100
8,Midland,1,2,Midland Dow,Chemics,Blue,Gold,,,100
9,Midland Dow,1,2,Midland Dow,Chargers,Green,Gold,,,100


In [2]:
## Output dataframe to new file called 2023_team_info.csv

## Team info for 2023 output file
# drop host column
teams_df = final_df.drop(columns=['host'])

# Path: quick_workbook.ipynb
teams_df.to_csv('data/2023_team_info.csv', index=False)

In [3]:
# Filter rows with matching host and team
matching_rows = final_df[final_df['host'] == final_df['team']]

# Sort the filtered rows by district number
sorted_rows = matching_rows.sort_values(by='district')

# Reset the index of the sorted rows
sorted_rows.reset_index(drop=True, inplace=True)

# Display the sorted dataframe
print(sorted_rows)

# Save the sorted dataframe to a CSV file
sorted_rows.to_csv('data/2023_district_hosts.csv', index=False)


                                   team division  district  \
0                             Marquette        1         1   
1                           Midland Dow        1         2   
2                  Muskegon Mona Shores        1         3   
3    Grand Rapids Forest Hills Northern        1         4   
4                            Grandville        1         5   
..                                  ...      ...       ...   
122                             Genesee        4       124   
123  Sterling Heights Parkway Christian        4       125   
124     Waterford Our Lady of the Lakes        4       126   
125           Riverview Gabriel Richard        4       127   
126          Plymouth Christian Academy        4       128   

                                   host         nickname         color1  \
0                             Marquette  Redmen/Redettes           Red    
1                           Midland Dow         Chargers         Green    
2                  Muskegon Mo

In [None]:
final_df.head(20)

## Show the lowest scores in the dataframe

final_df.sort_values(by='score').head(20)

## Show the distro of scores

# final_df['score'].hist()

## Show numberical counts of scores in incriments of 5

# final_df['score'].value_counts(bins=range(0, 101, 5))

# Number of match scores under 90

len(final_df[final_df['score'] < 90])

# Number of match scores under 80

len(final_df[final_df['score'] < 80])

# Single Block opperation above replaces the functionality of the following blocks of beta code

## Create a table with School info (Name, division, district assignment - from the district_2023 html on MHSAA site)

### Then merge that into the info from the table I have with School Nickname and colors

In [None]:
## Load entire district tree from local html file

import pandas as pd
import numpy as np
import os
import re
import json
import requests
from bs4 import BeautifulSoup

html_path = 'data\districts_2023.html'

with open(html_path, 'r') as f:
    html = f.read()
    


In [None]:
#### ALL IN ONE TRY ####

from bs4 import BeautifulSoup
import pandas as pd

html_doc = html
soup = BeautifulSoup(html_doc, 'html.parser')

# Create an empty DataFrame to store the data
df = pd.DataFrame(columns=['Division', 'Tournament Name', 'Host', 'Location', 'Teams'])

# Find all 'div' tags with class 'keep-together'
divisions = soup.find_all('div', class_='keep-together')

for division in divisions:
    division_number = division.find('span', {'data-bind': 'text:Division'}).text
    tournament_name = division.find('span', {'data-bind': 'text:TournamentName'}).text
    host = division.find('span', {'data-bind': 'text:Host'}).text
    location = division.find('a', {'data-bind': 'text: Title, attr: {href: LocationUrl}'}).text
    
    # Find all the team names, skipping the first 'a' tag which is the location
    teams = [a.text for a in division.find_all('a')[1:]]
    # # Remove the host team from the list
    # if host in teams:
    #     teams.remove(host)
    
    # Add data to the DataFrame
    df = df.append({
        'Division': division_number, 
        'Tournament Name': tournament_name, 
        'Host': host, 
        'Location': location, 
        'Teams': teams}, 
        ignore_index=True)

In [None]:
### Get a DF with a row for every team

df_temp = df.explode('Teams')

df_temp.info()
df_temp.head()

In [None]:
## Clean up the new team DF

## Drop The non numberical characters from 'Tournament Name' and Rename to 'District'

df_temp['District'] = df_temp['Tournament Name'].str.replace(r'\D+', '')

df_temp['District'] = df_temp['District'].astype(int)

# Drop the 'Tournament Name' column
df_temp.drop('Tournament Name', axis=1, inplace=True)

## Drop Host and Location

df_temp.drop(['Host', 'Location'], axis=1, inplace=True)

# Rename Teams to Team
df_temp.rename(columns={'Teams': 'Team'}, inplace=True)

# Remove capitalization from column names
df_temp.columns = df_temp.columns.str.lower()


## Move team name to first column

cols = df_temp.columns.tolist()
cols = cols[-1:] + cols[:-1]
df_temp = df_temp[cols]

df_temp.head()

In [None]:
### Rename df

df_by_district = df_temp

## Load the csv that contains nickname ect info

df_nickname = pd.read_csv('data\school_info\mhsaa_school_nickname_color_2020.csv')

## Remove capitalization from column names
df_nickname.columns = df_nickname.columns.str.lower()

df_nickname.head()

In [None]:
### match and merge the dataframes based on Team name and School name

### Use fuzzy match to match team names

from fuzzywuzzy import fuzz
from fuzzywuzzy import process

def match_name(name, list_names, min_score=0):
    # -1 score incase we don't get any matches
    max_score = -1
    # Returning empty name for no match as well
    max_name = ""
    # Iternating over all names in the other
    for name2 in list_names:
        #Finding fuzzy match score
        score = fuzz.ratio(name, name2)
        # Checking if we are above our threshold and have a better score
        if (score > min_score) & (score > max_score):
            max_name = name2
            max_score = score
    return (max_name, max_score)

# List for dicts for easy dataframe creation
dict_list = []
# iterating over our players without salaries found above
for name in df_by_district.team:
    # Use our method to find best match, we can set a threshold here
    match = match_name(name, df_nickname.school, 75)
    
    # New dict for storing data
    dict_ = {}
    dict_.update({"team_name" : name})
    dict_.update({"match_name" : match[0]})
    dict_.update({"score" : match[1]})
    dict_list.append(dict_)
    
merge_table = pd.DataFrame(dict_list)
# Display results
# print(merge_table)





In [None]:
## Do the table merges

df_by_district = df_by_district.merge(merge_table, left_on='team', right_on='team_name', how='left')
df_nickname = df_nickname.merge(merge_table, left_on='school', right_on='match_name', how='left')


In [None]:
# Merge df_by_district and df_nickname on the common columns generated by fuzzy matching
final_df = pd.merge(df_by_district, df_nickname, left_on='team', right_on='match_name', how='inner')

# Select only the columns you're interested in
final_df = final_df[['team', 'division', 'district', 'nickname', 'color1', 'color2', 'color3', 'color4', 'score']]


# Display the final dataframe
print(final_df)


In [None]:
# save to csv to check

df.to_csv('data\district_2023_team_and_host.csv', index=False)

### End 2023 Team info creation

In [None]:
### Create a dictionary of all the teams seperated by division level

# Create an empty dictionary to store the data
divisions_dict = {}

## 

In [None]:
from bs4 import BeautifulSoup
import pandas as pd

html_doc = html
soup = BeautifulSoup(html_doc, 'html.parser')

# Create an empty DataFrame to store the data
df = pd.DataFrame(columns=['Division', 'Tournament Name', 'Host', 'Location', 'Teams'])

# Find all 'div' tags with class 'keep-together'
divisions = soup.find_all('div', class_='keep-together')

for division in divisions:
    division_number = division.find('span', {'data-bind': 'text:Division'}).text
    tournament_name = division.find('span', {'data-bind': 'text:TournamentName'}).text
    host = division.find('span', {'data-bind': 'text:Host'}).text
    location = division.find('a', {'data-bind': 'text: Title, attr: {href: LocationUrl}'}).text
    teams = [team.text for team in division.find_all('span', {'data-bind': 'highlightedText: { text: TeamName, highlight: $parents[1].Search, css: "highlight" }'})]
    
    # Add data to the DataFrame
    df = df.append({
        'Division': division_number, 
        'Tournament Name': tournament_name, 
        'Host': host, 
        'Location': location, 
        'Teams': teams}, 
        ignore_index=True)


In [None]:
df.head()

In [None]:
### Dependencies
import pandas as pd
import numpy as np


## File Paths
# Path to mhsaa tables to merge

enrollment_path = 'data\school_info\mhsaa_enrolment_2022.csv'
name_color_path = 'data\school_info\mhsaa_school_nickname_color_2020.csv'

df_enrol = pd.read_csv(enrollment_path)
df_name_color = pd.read_csv(name_color_path)





In [None]:
### 5-9-23

## Code to scrape 2023 MHSAA Tourny Data




In [None]:
import pandas as pd

# Replace this with the plain text containing the tournament information
# read a text file into the variable text

text = open('2023_districts_raw.txt', 'r').read()

# Split the text into sections for each division
sections = text.split('Division ')

# Remove the first empty string
sections.pop(0)

# Initialize empty lists for each column in the dataframe
divisions = []
districts = []
hosts = []
locations = []

# Loop through the sections and extract the relevant information
for section in sections:
    lines = section.split('\n')
    division = 'Division ' + lines[0]
    for line in lines[1:]:
        if 'Baseball District' in line:
            district = line
        elif 'Host:' in line:
            host = line.split(': ')[1]
        elif 'Location:' in line:
            location = line.split(': ')[1]
        elif line != '':
            # Skip any blank lines
            districts.append(district)
            divisions.append(division)
            hosts.append(host)
            locations.append(location)

# Create a dataframe to store the extracted information
df = pd.DataFrame({'Division': divisions, 'District': districts, 'Host': hosts, 'Location': locations})

# Print the dataframe
print(df)


In [None]:
### DF came back as a ton of duplicates. I need to clean it up.

df = df.drop_duplicates()

# Reindex the dataframe
df = df.reset_index(drop=True)

# output the dataframe to a csv file
df.to_csv('data/2023_district_hosts.csv', index=False)


print(df)

In [None]:
df.head(20)

df.info()

## Regional Locations

In [None]:
### URLS of pages with retional data

urls = {'Division 1': 'https://my.mhsaa.com/Sports/MHSAA-Tournament-Brackets/BracketGroup/9/Classification/1/SportSeasonId/424201',
        'Division 2': 'https://my.mhsaa.com/Sports/MHSAA-Tournament-Brackets/BracketGroup/9/Classification/2/SportSeasonId/424201',
        'Division 3': 'https://my.mhsaa.com/Sports/MHSAA-Tournament-Brackets/BracketGroup/9/Classification/3/SportSeasonId/424201',
        'Division 4': 'https://my.mhsaa.com/Sports/MHSAA-Tournament-Brackets/BracketGroup/9/Classification/4/SportSeasonId/424201'}

In [None]:
import requests
from lxml import html
import pandas as pd

# Define the URLs for each division
urls = {'Division 1': 'https://my.mhsaa.com/Sports/MHSAA-Tournament-Brackets/BracketGroup/9/Classification/1/SportSeasonId/424201',
        'Division 2': 'https://my.mhsaa.com/Sports/MHSAA-Tournament-Brackets/BracketGroup/9/Classification/2/SportSeasonId/424201',
        'Division 3': 'https://my.mhsaa.com/Sports/MHSAA-Tournament-Brackets/BracketGroup/9/Classification/3/SportSeasonId/424201',
        'Division 4': 'https://my.mhsaa.com/Sports/MHSAA-Tournament-Brackets/BracketGroup/9/Classification/4/SportSeasonId/424201'}

# Initialize empty lists for each column in the dataframe
divisions = []
locations = []
links = []

# Loop through each division URL in the dictionary
for division, url in urls.items():
    try:
        # Send a GET request to the URL and parse the HTML content
        page = requests.get(url)
        tree = html.fromstring(page.content)

        # Find all the contest location spans using XPath
        location_spans = tree.xpath('//span[@class="contestlocation"]')

        # Loop through the location spans and extract the relevant information
        for location_span in location_spans:
            # Extract the location and link from the contest location span
            location = location_span.xpath('text()')[0].strip()
            link = location_span.xpath('a/@href')[0]
            # Append the information to the respective lists
            divisions.append(division)
            locations.append(location)
            links.append(link)
    except:
        print(f'Error: Failed to retrieve data for {division}')

# Create a dataframe to store the extracted information
df = pd.DataFrame({'Division': divisions, 'Location': locations, 'Link': links})

# Reset the index of the dataframe
df = df.reset_index(drop=True)

# Print the dataframe
print(df)


In [None]:
## Clean up the dataframe

## Drop duplicates
df = df.drop_duplicates()

df.head(30)
# df.info()

In [None]:
df.info()

### Output as a csv
## Might want to go back and adjust code to try to store which specific games are at each location
## regional has (semis and finals) then there is a quarterfinals round

df.to_csv('data/2023_regional_hosts.csv', index=False)

# Next Project

### Create a json with just the fields in michigan and try to integrate a column that marks the appropriate fields as host of districts and regionals

The text of the locations in the playoff csvs is not going to match the field names all that well. it might be worth trying to identify them from the map location - will have to go back to districts and extract map locations

In [None]:
### Try to get all the google maps link from the districts page

import pandas as pd
from bs4 import BeautifulSoup

## Read local file
path = 'districts_2023.html'
html = open(path, 'r').read()


# Parse the HTML
soup = BeautifulSoup(html, 'html.parser')

# Find all the tournament divs
tournaments = soup.select('div.keep-together')

# Initialize lists to store data
district_numbers = []
hosts = []
locations = []

# Extract data for each tournament
for tournament in tournaments:
    district_number = tournament.find('span', {'data-bind': 'text:Division'}).text
    host = tournament.find('span', {'data-bind': 'text:Host'}).text
    location = tournament.find('a', {'target': '_blank'}).get('href')
    

    district_numbers.append(district_number)
    hosts.append(host)
    locations.append(location)

# Create a DataFrame
data = {'Division': district_numbers, 'Host': hosts, 'Location': locations}
df = pd.DataFrame(data)

# add another column the is the index number of the row + 1
df['District'] = df.index + 1

# Print the DataFrame
print(df)

df.to_csv('district_tournaments.csv', index=False)




# Try to match up locations from the scraped district and regional and link them to a field in my json data

## Stragegy: The district csv contains a field that has a link to a google maps search. Loop through all of those and return the lat and longitude coordinates then match the coordinates to the nearest home plate coordinate in the json

In [None]:
import pandas as pd
import googlemaps

## paths

local_json = 'data\michigan_fields.json'

district_csv = 'district_tournaments.csv'

regional_csv = 'data\2023_regional_hosts.csv'

# Replace this with your own API key
api_key = "AIzaSyA_BhlTupRdBPBhRptQuR6pYorMVYQnRMA"



In [None]:
# load Data
df = pd.read_csv(district_csv)

# df.head()

df.info()





In [None]:
### Clean up the location column to remove first portion and just leave the address remaining



# Remove the unwanted portion of the string in the 'Location' column
prefix = "http://maps.google.com/maps?q="

# Check if the location is a string before applying lstrip
df['Location'] = df['Location'].apply(lambda x: x.lstrip(prefix) if isinstance(x, str) else x)

# Print the cleaned DataFrame
print(df)

In [None]:
df.head()

In [None]:
## USES GOOGLE CODE
import pandas as pd
import googlemaps

# Replace 'your_api_key' with your actual Google Maps API key
api_key = 'AIzaSyA_BhlTupRdBPBhRptQuR6pYorMVYQnRMA'
gmaps = googlemaps.Client(key=api_key)

# # Create a DataFrame from your data (use your actual DataFrame here)
# data = {
#     "Division": [1, 1, 1],
#     "Host": ["Marquette", "Midland Dow", "Muskegon Mona Shores"],
#     "Location": [
#         "North Marquette Fields, Marquette, MI",
#         "H H Dow High School - Baseball, 3901 N. Saginaw Rd. Midland, MI",
#         "Mona Shores Baseball Field, 1121 W. Seminole Rd. Muskegon, MI",
#     ],
#     "District": [1, 2, 3],
# }

# df = pd.DataFrame(data)

# Function to get the coordinates for a given address
def get_coordinates(address):
    geocode_result = gmaps.geocode(address)
    if geocode_result:
        lat = geocode_result[0]["geometry"]["location"]["lat"]
        lng = geocode_result[0]["geometry"]["location"]["lng"]
        return (lat, lng)
    else:
        return None

# Apply the function to the 'Location' column and store the coordinates in a new column
df["Coordinates"] = df["Location"].apply(get_coordinates)

# Print the updated DataFrame
print(df)


In [None]:
## Check Output

df.info()

## Google code worked OK - returned coords for 126 of 128

### Below I am going to try to match up those coordinates to the michigan fields jsons

In [None]:
#### set up paths and load data(copied from above)


import pandas as pd
import json
# import googlemaps

## paths

local_json = 'data\michigan_fields.json'

district_csv = 'district_tournaments.csv'

regional_csv = 'data\2023_regional_hosts.csv'

# Replace this with your own API key
api_key = "AIzaSyA_BhlTupRdBPBhRptQuR6pYorMVYQnRMA"

# load Data
df = pd.read_csv(district_csv)

## Load MI fields data from json file


# Read the JSON file
with open(local_json) as f:
    data = json.load(f)

# Create a DataFrame from the JSON data
mi_df = pd.DataFrame(data)

mi_df.head()

# df.head()

In [None]:
### Try a different approach to match the fields
## Use the Host name to find 3 matches from the mi_df
import pandas as pd
from fuzzywuzzy import fuzz, process

# Assuming you have the two dataframes df and mi_df

def find_closest_park_names(host, n_closest=3):
    closest_park_names = process.extract(host, mi_df["park_name"], limit=n_closest, scorer=fuzz.token_sort_ratio)
    return [name for name, score, index in closest_park_names]

# Apply the function to the 'Host' column and store the results in new columns
df[["closest_park_1", "closest_park_2", "closest_park_3"]] = df["Host"].apply(find_closest_park_names).apply(pd.Series)

# Print the updated DataFrame
print(df)


In [None]:
### FUNCTION TO FIND NEAREST FIELD TO DISTRICT TOURNAMENT LOCATION

import math

## Define a function to calculate the Haversine distance between two points
def haversine_distance(coord1, coord2):
    # Convert latitude and longitude to radians
    lat1, lon1 = map(math.radians, coord1)
    lat2, lon2 = map(math.radians, coord2)

    # Calculate the differences between latitudes and longitudes
    dlat = lat2 - lat1
    dlon = lon2 - lon1

    # Calculate the Haversine distance
    a = math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
    c = 2 * math.asin(math.sqrt(a))
    r = 6371  # Radius of the Earth in km

    return c * r

def find_closest_parks(coord, n_closest=3):
    if coord is None:
        return ["Unknown"] * n_closest

    mi_df["distance"] = mi_df["home_plate"].apply(lambda x: haversine_distance(coord, (x[1], x[0])))
    closest_park_indices = mi_df["distance"].nsmallest(n_closest).index
    return mi_df.loc[closest_park_indices, "park_name"].tolist()



# Make sure 'home_plate' in mi_df has coordinates in the format (lat, lng)
mi_df["home_plate"] = mi_df["home_plate"].apply(lambda x: (x[0], x[1]))

# Create a new column 'closest_park' in df
df[["closest_park_1", "closest_park_2", "closest_park_3"]] = df["Coordinates"].apply(find_closest_parks).apply(pd.Series)


# Print the updated DataFrame
print(df)



In [None]:
df.head()

In [None]:
## Output this matching as a csv so I can manulauly check it

df.to_csv('district_fields_text_match.csv', index=False)

## End of 5-9-23 Work for now. output csv file with possible matches for the district fields


# Working with outlier fields 

## Start 59/23 Night


In [None]:
## Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np

# Load files
out_df = pd.read_csv('outlier_fields.csv')

out_df.info()


In [None]:
# df_out = df_out.copy

In [None]:
import pandas as pd

# Assuming you already have the out_df DataFrame
# Creating an empty HP2 column
out_df['HP2'] = None

# Loop through the DataFrame rows and populate the HP2 column with repeated points
for idx, row in out_df.iterrows():
    fop_list = row['fop']
    repeated_points = [point for point in set(fop_list) if fop_list.count(point) > 1]
    if len(repeated_points) > 0:
        out_df.loc[idx, 'HP2'] = str(repeated_points[0])

# Display the updated DataFrame
out_df.head(20)









In [None]:
import pandas as pd
import requests
from lxml import html

url = 'https://my.mhsaa.com/Sports/Baseball/Districts'  # Replace this with the URL of the webpage you want to scrape
page = requests.get(url)
tree = html.fromstring(page.content)

# Find the game location using the XPath
location = tree.xpath('/html/body/form/div[5]/div[2]/div/div/div[2]/div[1]/div/div/div/div/div/div/div/div[2]/div[2]/div[1]/div[2]/div[1]/a')[0]

# Extract the relevant information
name = location.text.strip()
link = location.get('href')
address = link.split('=')[1].strip()

# Create a dataframe to store the scraped data
df = pd.DataFrame({'Field_name': [name], 'Location': [address], 'Link': [link]})

# Print the dataframe
print(df)


# Get latitude and longitude for each location
df["Coordinates"] = df["Location"].apply(get_latitude_longitude)

# Print the DataFrame
print(df)




In [None]:
print(df_name_color.columns)

In [None]:
### Match the school name to School and merge the dataframes into a single object
from fuzzywuzzy import fuzz, process

def find_best_match(school_name, choices, score_cutoff=70):
    best_match = process.extractOne(school_name, choices, scorer=fuzz.token_sort_ratio, score_cutoff=score_cutoff)
    if best_match:
        return best_match[0]
    else:
        return None

# Get the list of school names from df_name_color
school_names = df_name_color['School'].tolist()

# Apply find_best_match function to create a new column 'best_match' in df_enrol
df_enrol['best_match'] = df_enrol['school_name'].apply(find_best_match, choices=school_names, score_cutoff=80)



In [None]:
# Rename the 'School' column in df_name_color to 'best_match'
df_name_color = df_name_color.rename(columns={'School': 'best_match'})

# Merge the dataframes on the 'best_match' column
df_merged = df_enrol.merge(df_name_color, on='best_match', how='left')


In [None]:
print(df_enrol.columns)

print(df_name_color.columns)

In [None]:
# - get icons sorted out
# -*** DONE*** get the level assigner sorted out in the etl
# - add filter based on level to map
# - implement the search box places from 