In [None]:
##### ETL NOTEBOOK FOR 2023 MHSAA TOURNEY SPECIFIC MAP

#### Adapted from ETL for JSON

## Dependencies and Setup
### Dependencies

from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import os
import re
import time

## Start timer
start_time = time.time()


In [None]:
## LOAD BLOCK###
#### Load data from kml file exported from Google Earth

file_path = ('data/kml/MHSAA_2023.kml') # file path to kml file


# Read the KML file
with open(file_path) as file:
    xml_data = file.read()

# Initialize soup variables for parsing file
soup = BeautifulSoup(xml_data, 'xml')
folders = soup.Document.Folder
list = soup.Document.Folder.find_all('Folder')

# Create a list to store rows to append to the DataFrame
rows = []

# Loop through the folders and extract the data
for folder in list:
    try:
        field_name = folder.find('name').text
        foul = folder.find_all('coordinates')[0].text
        fop = folder.find_all('coordinates')[1].text
        notes = None

        # Check if there is a description tag, if so, use it for notes
        if folder.find('description') is not None:
            notes = folder.find('description').text

        row = {
            'field': field_name,
            'foul': foul,
            'fop': fop,
            'notes': notes
        }

        rows.append(row)

    except Exception as e:
        # Add name of folder to a list of failed folders
        failed.append(folder.find('name').text)
        print(f"Error processing folder: {folder.find('name').text}. Error message: {str(e)}")

# Convert the list of rows to a DataFrame
df = pd.DataFrame(rows, columns=['field', 'foul', 'fop', 'notes'])

# print('Failed to parse:', failed)


In [None]:
df.head()

In [None]:
# Clean the new dataframe


# Create a copy of the original DataFrame
df_cleaned = df.copy()

# Remove new line and space characters from coordinates
df_cleaned = df_cleaned.replace(r'\n','', regex=True) 
df_cleaned = df_cleaned.replace(r'\t','', regex=True) 

# Drop any duplicate rows
df_cleaned = df_cleaned.drop_duplicates(subset=['field'], keep='first')

# Drop any rows with empty fields
df_cleaned = df_cleaned[(df_cleaned != 0).all(1)]

In [None]:
df_cleaned.info()
df_cleaned.head()

In [None]:
##### Clean up polygon data and create a new home_plate column

def parse_coordinates(coord_string):
    coords = coord_string.split()
    parsed_coords = [tuple(map(float, coord.split(',')[:2])) for coord in coords]
    return parsed_coords

# Create a new column for the home_plate location using the first set of coordinates in the 'fop' column
df_cleaned['home_plate'] = df_cleaned['fop'].apply(lambda x: parse_coordinates(x)[0])

# Apply the parse_coordinates function to the 'foul' and 'fop' columns
df_cleaned['foul'] = df_cleaned['foul'].apply(parse_coordinates)
df_cleaned['fop'] = df_cleaned['fop'].apply(parse_coordinates)


In [None]:
############## AREA CALCULATION ##############


import pyproj
from shapely.geometry import Polygon
from shapely.ops import transform


def calculate_area(coords):
    # Create a Polygon object from the coordinates
    polygon = Polygon(coords)

    # Calculate the centroid of the polygon
    centroid = polygon.centroid

    # Create a custom LAEA projection centered on the centroid
    custom_projection = f"+proj=laea +lat_0={centroid.y} +lon_0={centroid.x} +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs"

    # Create a transformer for converting coordinates to the custom LAEA projection
    transformer = pyproj.Transformer.from_crs(
        pyproj.CRS("EPSG:4326"),  # WGS 84 (latitude and longitude)
        pyproj.CRS(custom_projection),  # Custom LAEA projection
        always_xy=True
    )

    # Define a function to transform coordinates using the transformer
    def transform_coordinates(x, y):
        return transformer.transform(x, y)

    # Convert the coordinates to the custom LAEA projection
    polygon_laea = transform(transform_coordinates, polygon)

    # Calculate the area in square meters
    area_sqm = polygon_laea.area

    # Convert the area to square feet (1 square meter = 10.764 square feet)
    area_sqft = area_sqm * 10.764

    return area_sqft



### Call Function and add to dataframe
df_cleaned['foul_area_sqft'] = df_cleaned['foul'].apply(calculate_area)
df_cleaned['fop_area_sqft'] = df_cleaned['fop'].apply(calculate_area)

## Calculate the total area of the field and the ratio of foul area to field area
df_cleaned['field_area_sqft'] = df_cleaned['foul_area_sqft'] + df_cleaned['fop_area_sqft']
## Percentage foul area
df_cleaned['foul_area_per'] = df_cleaned['foul_area_sqft'] / df_cleaned['field_area_sqft']
## Fair to Foul Ratio
df_cleaned['fair_to_foul'] = df_cleaned['fop_area_sqft'] / df_cleaned['foul_area_sqft']


In [None]:
############# FENCE DISTANCE CALCULATION #############

from geopy.distance import great_circle
import numpy as np

def interpolate_points(start, end, length_ratio):
    start_np = np.array(start)
    end_np = np.array(end)
    return tuple(start_np + (end_np - start_np) * length_ratio)

def calculate_distances(home_plate, outfield_coords, num_points=90):
    def is_same_point(point1, point2, tolerance=1e-6):
        return abs(point1[0] - point2[0]) < tolerance and abs(point1[1] - point2[1]) < tolerance

    home_plate_lat_lon = (home_plate[1], home_plate[0])
    distances = []

    # Calculate total line length
    total_length = 0
    segments = []
    for i in range(len(outfield_coords) - 1):
        start = outfield_coords[i]
        end = outfield_coords[i + 1]
        if not is_same_point(home_plate, start) and not is_same_point(home_plate, end):
            segment_length = great_circle((start[1], start[0]), (end[1], end[0])).feet
            segments.append((start, end, segment_length))
            total_length += segment_length

    # Calculate the distance between equally spaced points
    spacing = total_length / (num_points - 1)

    # Interpolate points and calculate distances
    current_length = 0
    segment_index = 0
    for i in range(num_points):
        while segment_index < len(segments) - 1 and current_length > segments[segment_index][2]:
            current_length -= segments[segment_index][2]
            segment_index += 1

        start, end, segment_length = segments[segment_index]
        length_ratio = current_length / segment_length
        point = interpolate_points(start, end, length_ratio)
        distance = round(great_circle(home_plate_lat_lon, (point[1], point[0])).feet)
        distances.append(distance)

        current_length += spacing

    return distances

# Calculate distances for each row
df_cleaned['distances'] = df_cleaned.apply(lambda row: calculate_distances(row['home_plate'], row['fop']), axis=1)

# Calculate max, min, and average distances for each row
df_cleaned['max_distance'] = df_cleaned['distances'].apply(max)
df_cleaned['min_distance'] = df_cleaned['distances'].apply(min)
df_cleaned['avg_distance'] = df_cleaned['distances'].apply(lambda distances: sum(distances) / len(distances))
# get the median distance
df_cleaned['median_distance'] = df_cleaned['distances'].apply(lambda distances: np.median(distances))


In [None]:
######## CHECK BLOCK ########

## Check how long the distance list is for each row
df_cleaned['num_distances'] = df_cleaned['distances'].apply(len)

## Print the value counts for the 'num_distances' column
df_cleaned['num_distances'].value_counts()

In [None]:
########## NOT NECESSARY FOR THIS PROJECT ##########

# ### Get Geolocation of each field based on home plate coordinates and return state and country
# ### This block takes a long time to run - will need to revisit
# ## up to ten minutes

# from geopy.geocoders import Nominatim
# from geopy.exc import GeocoderTimedOut, GeocoderServiceError
# from tqdm import tqdm

# geolocator = Nominatim(user_agent="baseball_field_locator")

# # Function to get location information
# def get_location_info(lng, lat):
#     try:
#         location = geolocator.reverse((lat, lng), timeout=10)
#         state = location.raw['address'].get('state', None)
#         country = location.raw['address'].get('country', None)
#         return state, country
#     except GeocoderTimedOut:
#         print(f"GeocoderTimedOut error for coordinates: ({lng}, {lat})")
#         return None, None
#     except GeocoderServiceError:
#         print(f"GeocoderServiceError for coordinates: ({lng}, {lat})")
#         return None, None

# # Extract the first coordinate for each field
# df_cleaned['lng'], df_cleaned['lat'] = zip(*df_cleaned['home_plate'].apply(lambda x: x))

# # Wrap the DataFrame apply function with tqdm for progress indication
# tqdm.pandas(desc="Processing coordinates")

# # Get state and country information for each field
# df_cleaned[['state', 'country']] = df_cleaned.progress_apply(lambda row: get_location_info(row['lng'], row['lat']), axis=1, result_type='expand')


In [None]:
df_cleaned.head()

In [None]:
######## CHECK AND WHATNOT BLOCK



In [None]:
## Function to create ranks for each column

def rank_fields(df):
    # Calculate the rank for each category
    df['max_distance_rank'] = df['max_distance'].rank(ascending=False, method='min')
    df['min_distance_rank'] = df['min_distance'].rank(ascending=False, method='min')
    df['avg_distance_rank'] = df['avg_distance'].rank(ascending=False, method='min')
    df['median_distance_rank'] = df['median_distance'].rank(ascending=False, method='min')
    df['field_area_rank'] = df['field_area_sqft'].rank(ascending=False, method='min')
    df['foul_area_rank'] = df['foul_area_sqft'].rank(ascending=False, method='min')
    df['fop_area_per_rank'] = df['fop_area_sqft'].rank(ascending=False, method='min')
    df['ratio_rank'] = df['fair_to_foul'].rank(ascending=False, method='min')

    return df



In [None]:
## Run Function

df_cleaned = rank_fields(df_cleaned)

In [None]:
#### Orienting the map to the home plate location ####

### Find the center of the field
def calculate_centroid(coords):
    x_coords = [coord[0] for coord in coords]
    y_coords = [coord[1] for coord in coords]
    centroid_x = sum(x_coords) / len(coords)
    centroid_y = sum(y_coords) / len(coords)
    return (centroid_x, centroid_y)


## Find the bearing between the home plate and the center of the field
import math

def calculate_bearing(point1, point2):
    lat1, lon1 = math.radians(point1[1]), math.radians(point1[0])
    lat2, lon2 = math.radians(point2[1]), math.radians(point2[0])

    d_lon = lon2 - lon1

    x = math.cos(lat2) * math.sin(d_lon)
    y = math.cos(lat1) * math.sin(lat2) - math.sin(lat1) * math.cos(lat2) * math.cos(d_lon)

    bearing = math.degrees(math.atan2(x, y))
    bearing = (bearing + 360) % 360  # Normalize the bearing to the range [0, 360)

    return bearing

### Function to classify direction in laymans terms North, South, East, West, ect
def degrees_to_cardinal_direction(degrees):
    directions = ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW', 'N']
    index = round(degrees / 45)
    return directions[index]


In [None]:
# Calculate the centroid of the outfield fence coordinates for each row
df_cleaned['fop_centroid'] = df_cleaned['fop'].apply(lambda coords: calculate_centroid(coords[1:]))

# Calculate the bearing between home plate and the centroid for each row
df_cleaned['field_orientation'] = df_cleaned.apply(lambda row: calculate_bearing(row['home_plate'], row['fop_centroid']), axis=1)

# Convert the bearing to a cardinal direction
df_cleaned['field_cardinal_direction'] = df_cleaned['field_orientation'].apply(degrees_to_cardinal_direction)



### All the geo transformation should take place above this

## starting the process of matching in data from other sources

In [None]:
######### RENAME field to park_name #########
###### SHould move up the file ######

df_cleaned.rename(columns={'field': 'park_name'}, inplace=True)

In [None]:
######### NEED TO RENAME TO WORK WITH NEXT BLOCK #########

df_cleaned.info()

df_cleaned.head()

In [None]:


### Save the cleaned DataFrame to a prelim json

df_cleaned.to_json('data/mhsaa_step1.json', orient='records')

## WORKING HERE DOWN

In [1]:
import json
import pandas as pd
import numpy as np

### Load the host team info with nickname and team colors
path = 'data/2023_district_hosts.csv'
host_df = pd.read_csv(path)

## Load the prelim json
path = 'data/mhsaa_step1.json'
with open(path) as f:
    data = json.load(f)
parks_df = pd.DataFrame(data)

## open messy table to pull the notes column
## merge based on the district number 
path = 'data\district_notes.csv'
messe_df = pd.read_csv(path)

# Clean up note column
messe_df['Plot Note'] = messe_df['Plot Note'].replace('y', np.nan)

In [2]:
## Matching Function to compair host names to park names 

import pandas as pd
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

def find_host_matches(host_df, parks_df):
    # Initialize empty lists to store the matches and unmatched park names
    matches = []
    unmatched_park_names = []

    # Iterate over each host and district in the host_df
    for host, district in zip(host_df['host'], host_df['district']):
        # Use fuzzy matching to find potential matches in park names
        potential_matches = process.extractBests(host, parks_df['park_name'], scorer=fuzz.token_set_ratio, score_cutoff=80)

        # Store the host, district, and potential matches with their scores
        matches.append({'host': host, 'district': district, 'potential_matches': potential_matches})

        # Check if any strong matches were found
        if len(potential_matches) > 0:
            max_score = max(potential_matches, key=lambda x: x[1])[1]
            if max_score >= 80:
                continue

        # If no strong matches were found, add the park name to unmatched list
        unmatched_park_names.append((host, district))

    # Create a dataframe from the matches list
    matches_df = pd.DataFrame(matches)

    # Count the number of strong matches and unmatched park names
    strong_matches_count = matches_df['potential_matches'].apply(lambda x: sum(match[1] >= 80 for match in x)).sum()
    unmatched_count = len(unmatched_park_names)

    print("Number of strong matches:", strong_matches_count)
    print("Number of unmatched park names:", unmatched_count)
    print("Unmatched park names:", unmatched_park_names)

    return matches_df




In [3]:
# Call the function to find host matches
result_df = find_host_matches(host_df, parks_df)

# Merge the result_df to messe_df on district number
merged_df = result_df.merge(messe_df, left_on='district', right_on='district number')

# Select the desired columns
merged_df = merged_df[['host', 'district', 'potential_matches', 'Plot Note', 'Map Link MHSAA']]
merged_df.head()
# merged_df.info()

Number of strong matches: 118
Number of unmatched park names: 13
Unmatched park names: [('Allen Park', 18), ('Detroit Western', 19), ('St Clair Shores Lake Shore', 21), ('Detroit U of D Jesuit', 23), ('Bloomfield Hills Brother Rice', 24), ('Dearborn Divine Child', 53), ('Houghton Lake', 68), ('Detroit Communication Media Arts', 89), ('Detroit Osborn', 91), ('Painesdale Jeffers', 97), ('Brethren', 107), ('East Jackson', 118), ('Waterford Our Lady of the Lakes', 126)]


Unnamed: 0,host,district,potential_matches,Plot Note,Map Link MHSAA
0,Marquette,1,"[(Marquette HS - Hurley Field, 100, 126)]",,http://maps.google.com/maps?q=North Marquette ...
1,Midland Dow,2,"[(Midland HH Dow High School, 100, 21)]",,http://maps.google.com/maps?q=H H Dow High Sch...
2,Muskegon Mona Shores,3,"[(Muskegon Mona Shores HS, 100, 125)]",,http://maps.google.com/maps?q=Mona Shores Base...
3,Grand Rapids Forest Hills Northern,4,"[(Grand Rapids Forest Hills Northern HS, 100, ...",,http://maps.google.com/maps?q=FHN Stadium - Ba...
4,Grandville,5,"[(Grandville High School - high school, 100, 1...",,http://maps.google.com/maps?q=Grandville High ...


In [4]:


## Save the merged DataFrame to a csv

merged_df.to_csv('data/district_matches.csv', index=False)

In [None]:
park_df.info()
park_df.head()

# host_df.info()
# host_df.head()

In [None]:
### Merge the host team info with the park info



# WORK DOWN FROM HERE

### FILL IN THE REST OF JSON WITH THE DATA FOR THE 2023 TOURNEY

In [None]:
from matplotlib import pyplot as plt

In [None]:
# Histogram of the max distance, min distance, average distance, and median distance

fig, ax = plt.subplots(2, 2, figsize=(12, 8))

ax[0, 0].hist(df_cleaned['max_distance'], bins=20)

ax[0, 1].hist(df_cleaned['min_distance'], bins=20)

ax[1, 0].hist(df_cleaned['avg_distance'], bins=20)

ax[1, 1].hist(df_cleaned['median_distance'], bins=20)

ax[0, 0].set_title('Max Distance')
ax[0, 1].set_title('Min Distance')

ax[1, 0].set_title('Average Distance')
ax[1, 1].set_title('Median Distance')

plt.show()


In [None]:
## Compile a list of fields that are outliers

outlier_fields = df_cleaned[(df_cleaned['max_distance'] > 400) | (df_cleaned['min_distance'] < 200) | (df_cleaned['avg_distance'] > 400) | (df_cleaned['median_distance'] > 400)]

len(outlier_fields)

print(outlier_fields['park_name'].values)



In [None]:
# Create list of the top and bottom ten from each category

top_ten_max = df_cleaned.sort_values(by='max_distance', ascending=False).head(10)
top_ten_min = df_cleaned.sort_values(by='min_distance', ascending=True).head(10)

top_ten_avg = df_cleaned.sort_values(by='avg_distance', ascending=False).head(10)
top_ten_median = df_cleaned.sort_values(by='median_distance', ascending=False).head(10)

top_ten_field_area = df_cleaned.sort_values(by='field_area_sqft', ascending=False).head(10)

top_ten_foul_area = df_cleaned.sort_values(by='foul_area_sqft', ascending=False).head(10)

top_ten_fop_area = df_cleaned.sort_values(by='fop_area_sqft', ascending=False).head(10)

top_ten_ratio = df_cleaned.sort_values(by='fair_to_foul', ascending=False).head(10)

bottom_ten_ratio = df_cleaned.sort_values(by='fair_to_foul', ascending=True).head(10)

bottom_ten_max = df_cleaned.sort_values(by='max_distance', ascending=True).head(10)
bottom_ten_min = df_cleaned.sort_values(by='min_distance', ascending=False).head(10)
bottom_ten_avg = df_cleaned.sort_values(by='avg_distance', ascending=True).head(10)
bottom_ten_median = df_cleaned.sort_values(by='median_distance', ascending=True).head(10)


### Create and display a dataframe with columns for the top and bottom ten fields for each category

top_bottom_df = pd.DataFrame()

top_bottom_df['top_ten_max'] = top_ten_max['park_name'].values
top_bottom_df['top_ten_min'] = top_ten_min['park_name'].values
top_bottom_df['top_ten_avg'] = top_ten_avg['park_name'].values

top_bottom_df['top_ten_median'] = top_ten_median['park_name'].values
top_bottom_df['top_ten_field_area'] = top_ten_field_area['park_name'].values
top_bottom_df['top_ten_foul_area'] = top_ten_foul_area['park_name'].values
top_bottom_df['top_ten_fop_area'] = top_ten_fop_area['park_name'].values
top_bottom_df['top_ten_ratio'] = top_ten_ratio['park_name'].values

top_bottom_df['bottom_ten_ratio'] = bottom_ten_ratio['park_name'].values
top_bottom_df['bottom_ten_max'] = bottom_ten_max['park_name'].values
top_bottom_df['bottom_ten_min'] = bottom_ten_min['park_name'].values
top_bottom_df['bottom_ten_avg'] = bottom_ten_avg['park_name'].values
top_bottom_df['bottom_ten_median'] = bottom_ten_median['park_name'].values


top_bottom_df.head(10)




In [None]:
import json

### Load prelim JSON as data

with open('data/mhsaa_step1.json') as f:

    data = json.load(f)



In [None]:
### NEW WITH AUTO SCALING

def calculate_max_y(data, num_bins=36, level_filter=None):
    count_by_orientation = process_data(data, level_filter)
    
    # Compute the histogram
    bin_counts = np.zeros(num_bins)
    
    for orientation, count in count_by_orientation.items():
        idx = int(orientation / (360 / num_bins))
        if idx == num_bins:
            idx = 0
        bin_counts[idx] += count
    
    return max(bin_counts)


def create_polar_chart(data, num_bins=36, level_filter=None, y_min=-20, background_color='#2b2b2b', color_map=plt.cm.viridis, bar_alpha=0.8):
    count_by_orientation = process_data(data, level_filter)
    
    # Compute the histogram
    bin_edges = np.linspace(0.0, 2 * np.pi, num_bins + 1)
    bin_counts = np.zeros(num_bins)
    
    for orientation, count in count_by_orientation.items():
        idx = int(orientation / (360 / num_bins))
        if idx == num_bins:
            idx = 0
        bin_counts[idx] += count
    
    bin_width = 2 * np.pi / num_bins

    # Set plot size
    plt.figure(figsize=(10, 10))

    ax = plt.subplot(111, projection='polar')
    ax.set_theta_direction(-1)
    ax.set_theta_offset(np.pi / 2)

    ax.set_facecolor('#808080')
    # ax.set_yscale('log')

    # Set dark background
    ax.set_facecolor(background_color)
    plt.gca().set_rlabel_position(22.5)
    y_max = calculate_max_y(data, num_bins=num_bins, level_filter=level_filter) + 5
    ax.set_ylim(y_min, y_max)  # Adjust based on max count

    # Add bars for negative values
    zero_height_bars = ax.bar(bin_edges[:-1], np.abs(ax.get_ylim()[0]), width=bin_width, bottom=0.0, color='k', alpha=0.3)

    bars = ax.bar(bin_edges[:-1], bin_counts, width=bin_width, bottom=0)
    
    # Use custom colors and opacity
    for r, bar in zip(bin_counts, bars):
        bar.set_facecolor(color_map(r / max(bin_counts)))
        bar.set_alpha(bar_alpha)

    plt.show()


In [None]:
##### CALL AUTO ADJUSTING CHART #####


## NEW PERAMS


# Call your function
create_polar_chart(
    data, 
    num_bins=30, 
    # level_filter="level1", 
    y_min=0, 
    background_color='#2b2b2b', 
    color_map=plt.cm.viridis, 
    bar_alpha=0.8
)


In [None]:
## NEW CHAT GPT CODE

def create_polar_chart(data, num_bins=36, level_filter=None, y_min=-20, y_max=130, background_color='#2b2b2b', color_map=plt.cm.viridis, bar_alpha=0.8):
    count_by_orientation = process_data(data, level_filter)
    
    # Compute the histogram
    bin_edges = np.linspace(0.0, 2 * np.pi, num_bins + 1)
    bin_counts = np.zeros(num_bins)
    
    for orientation, count in count_by_orientation.items():
        idx = int(orientation / (360 / num_bins))
        if idx == num_bins:
            idx = 0
        bin_counts[idx] += count
    
    bin_width = 2 * np.pi / num_bins

    # Set plot size
    plt.figure(figsize=(10, 10))

    ax = plt.subplot(111, projection='polar')
    ax.set_theta_direction(-1)
    ax.set_theta_offset(np.pi / 2)

    ax.set_facecolor('#808080')
    # ax.set_yscale('log')

    # Set dark background
    ax.set_facecolor(background_color)
    plt.gca().set_rlabel_position(22.5)
    ax.set_ylim(y_min, y_max)  # Adjust based on max count

    # Add bars for negative values
    zero_height_bars = ax.bar(bin_edges[:-1], np.abs(ax.get_ylim()[0]), width=bin_width, bottom=0.0, color='k', alpha=0.3)

    bars = ax.bar(bin_edges[:-1], bin_counts, width=bin_width, bottom=0)
    
    # Use custom colors and opacity
    for r, bar in zip(bin_counts, bars):
        bar.set_facecolor(color_map(r / max(bin_counts)))
        bar.set_alpha(bar_alpha)

    plt.show()


In [None]:
### Create a polar chart showing the direction of all the tournment fields


import numpy as np
import matplotlib.pyplot as plt

# create a function to process the data, counting the orientations and filtering by level.

from collections import defaultdict

def process_data(data, level_filter=None):
    count_by_orientation = defaultdict(int)
    
    for record in data:
        if level_filter is None or record['level'] == level_filter:
            orientation = round(record['field_orientation'])
            count_by_orientation[orientation] += 1

    return count_by_orientation

def create_polar_chart(data, num_bins=36, level_filter=None):
    count_by_orientation = process_data(data, level_filter)
    
    # Compute the histogram
    bin_edges = np.linspace(0.0, 2 * np.pi, num_bins + 1)
    bin_counts = np.zeros(num_bins)
    
    for orientation, count in count_by_orientation.items():
        idx = int(orientation / (360 / num_bins))
        if idx == num_bins:
            idx = 0
        bin_counts[idx] += count
    
    bin_width = 2 * np.pi / num_bins

    # Set plot size
    plt.figure(figsize=(10, 10))

    ax = plt.subplot(111, projection='polar')
    ax.set_theta_direction(-1)
    ax.set_theta_offset(np.pi / 2)

    ax.set_facecolor('#808080')
    ###
    # ax.set_yscale('log')

    
    # # Set dark background
    ax.set_facecolor('#2b2b2b')
    plt.gca().set_rlabel_position(22.5)
    ax.set_ylim(-20, 130)  # Adjust based on max count

    # Add bars for negative values
    zero_height_bars = ax.bar(bin_edges[:-1], np.abs(ax.get_ylim()[0]), width=bin_width, bottom=0.0, color='k', alpha=0.3)

    bars = ax.bar(bin_edges[:-1], bin_counts, width=bin_width, bottom=0)
    
    # Use custom colors and opacity
    for r, bar in zip(bin_counts, bars):
        bar.set_facecolor(plt.cm.viridis(r / max(bin_counts)))
        # bar.set_facecolor(plt.cm.plasma(r / max(bin_counts)))
        bar.set_alpha(0.8)

    plt.show()

In [None]:
create_polar_chart(data, num_bins=50, level_filter=None)