In [1]:
import requests
from bs4 import BeautifulSoup
import numpy as np
from urllib.parse import urljoin
import pandas as pd
from datetime import datetime
import re

In [2]:
import champ_placement as chp

<a id ="top"></a>

### Important functions in the champ_placement class
 - [Year_Link_finder](#years)
 - [month_soup](#month)
 - [recent_champ](#recent_champ) - finds the most recent show with the name `championship`. This modified for [all champ shows](#load_most_recent)
 - [find_classes](#find_classes)
 - [finding champ shows on the kc website](#KC_website)
 - [load most recent show](#load_most_recent)

---
<a id=years></a>

In [3]:
def year_link_finder(min_year=None, max_year=None):
    base_link = "https://www.agilityplaza.com/results/"
    
    if min_year ==None:
        min_year = 2004
    if max_year == None:
        max_year = int(datetime.now().date().strftime("%Y"))

    years = np.arange(min_year, max_year+1,1).astype(str)
    year_link = {}
    for year in years:
        year_link[year] = base_link + year

    return year_link

def current_year_link():
        current_year = str(datetime.now().date().strftime("%Y"))
        base_link = "https://www.agilityplaza.com/results/"
        year_link = base_link+current_year
        return year_link
        
print(year_link_finder(2024))
print(current_year_link())

{'2024': 'https://www.agilityplaza.com/results/2024'}
https://www.agilityplaza.com/results/2024


---
<a id="month"></a>

## Month_soup
this returns the html soup of the current year and the month specified by the `months_ago` argument.


[back to the top](#top)

In [4]:
champ_placement_instance = chp.champ_placement()

def month_soup(self, months_ago=0, return_month=False):
    """
    Extracts a portion of HTML soup corresponding to a specific month's data from agility plaza.

    Args:
        self: Instance of the class containing the method.
        months_ago (int, optional): An integer indicating how many months ago the function should extract data for. Default is 0, representing the current month.
        return_month (bool, optional): A boolean indicating whether to return the month name along with the soup. Default is False.

    Returns:
        list or tuple: If return_month is False, returns a list of HTML elements between the selected month's data. If return_month is True, returns a tuple containing a list of HTML elements and the name of the month.

    Description:
        This function sends an HTTP GET request to the URL specified by self.current_year_link, parses the response content using BeautifulSoup, and locates all <thead> elements within the HTML soup. It selects the <thead> element corresponding to the month specified by months_ago (default is 0 for the current month). It retrieves all HTML elements between the selected <thead> element and the next <thead> element. If return_month is set to True, the function extracts the month name from the selected <thead> element. Finally, it returns either the extracted HTML elements or both the HTML elements and the month name, depending on the value of return_month.
    """
    # Send an HTTP GET request to the URL
    url = self.current_year_link
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find all <thead> elements
    theads = soup.find_all("thead")
    
    # Get the first <thead>
    first_thead = theads[months_ago]

    # Find the elements between the first and second <thead>
    elements_between = []
    current_element = first_thead.find_next_sibling()
    while current_element and current_element.name != 'thead':
        elements_between.append(current_element)
        current_element = current_element.find_next_sibling()

    if return_month:
        month = first_thead.text.strip().split(" ")[0]
        return elements_between, month
    else:
        return elements_between


print(month_soup(champ_placement_instance)[:5])


Most recent show (2024-04-14 00:00:00) was 'scottish border collie' but it was cancelled.
Last show to run was scunthorpe
Matching rows found:
Scunthorpe Championship Agility Show
[<tr>
<th>Dates</th>
<th>Name</th>
</tr>, <tr class="clickable-row organization200" data-href="/competition/1656798794/results">
<td>Sun 28</td>
<td>Tryouts for Team GB Senior (age 55+) World Championships</td>
</tr>, <tr class="clickable-row organization8" data-href="/competition/2012429214/results">
<td>Sun 28</td>
<td>For The Love Of Hoopers</td>
</tr>, <tr class="clickable-row organization1" data-href="/competition/2079898915/results">
<td>Sun 28</td>
<td>Wilton </td>
</tr>, <tr class="clickable-row organization100" data-href="/competition/1442756458/results">
<td>Sat 27 - Sun 28</td>
<td>Frittenden Dog Agility</td>
</tr>]


<a id = "recent_champ"></a>

## recent_champ
finds the link to the show with the most recent championship in it.
There is also `champ_this_year` which should find all champs in the year but it doesn't work - this is mainly here for a later project that might need this feature


[back to the top](#top)

In [5]:
champ_placement_instance = chp.champ_placement()

def recent_champ(self, months_ago=0, print_statement=True):
    """
    Finds the most recent championship competition within a specified range of months.

    Args:
        self: Instance of the class containing the method.
        months_ago (int, optional): Number of months ago to start searching for championships. Default is 0, representing the current month.
        print_statement (bool, optional): Whether to print search progress and results. Default is True.

    Returns:
        tuple or None: A tuple containing the link and name of the most recent championship competition if found, or None if no championship is found within the specified range.

    Description:
        This function searches for the most recent championship competition within a specified range of months. It starts the search from the current month (or a specified number of months ago) and goes back up to 12 months. For each month, it retrieves the HTML soup corresponding to the competition data and checks if any competition contains the word "Championship" in its name. If a championship is found, it returns a tuple containing the link and name of the championship competition. If no championship is found within the specified range, it returns None. The function optionally prints search progress and results based on the value of the print_statement parameter.
    """
    max_months = 12  # Maximum number of months to go back
    for i in range(months_ago, max_months + 1):
        month_soup = self.month_soup(i)
        Name = None
        link = None

        for j in range(1, len(month_soup)):
            td_element = month_soup[j].find_all('td')[-1]

            if "Championship" in td_element.text:
                Name = td_element.text
                link =  self.base_link[:-9] + month_soup[j].get('data-href')
                if print_statement ==True:
                    print(f"Championship found in {td_element.text}, link {link}")
                    
                return link, Name  # Exit the function once Championship is found
                break

        if Name is None:
            if print_statement==True:
                print(f"No competition with 'Championship' in the name was found for {i} months ago. Trying next month.")
            
    if print_statement ==True:
        print("No competition with 'Championship' in the name was found in the last", max_months, "months.")
    return None

def champ_this_year(self, months_ago=0, print_statement=True):
    '''this almost works but there is an index error in the month_soup with the indexing from max_months. Need to fix month_soup funciton to use this.'''
    max_months = 12  # Maximum number of months to go back
    championships = []  # List to store found championships

    for i in range(months_ago, max_months + 1):
        month_soup = self.month_soup(i)

        for j in range(1, len(month_soup)):
            td_element = month_soup[j].find_all('td')[-1]

            if "Championship" in td_element.text:
                Name = td_element.text
                link =  self.base_link[:-9] + month_soup[j].get('data-href')
                championships.append((Name, link))  # Append championship name and link to the list
                
                if print_statement:
                    print(f"Championship found in {Name}, link {link}")

    if not championships:
        print("No competition with 'Championship' in the name was found in the last", max_months, "months.")
        
    return championships

# recent_champ(champ_placement_instance)


recent_champ(champ_placement_instance, months_ago = 2, print_statement = False)

Most recent show (2024-04-14 00:00:00) was 'scottish border collie' but it was cancelled.
Last show to run was scunthorpe
Matching rows found:
Scunthorpe Championship Agility Show


('https://www.agilityplaza.com/competition/1258377046/results',
 'Open Junior Agility Championships')

---
<a id = "find_classes"></a>

## find_classes
This function finds the link to all the championship classes in the competition with "Championship" in their name and returns a df of all the links.

It has an added part for if the link has come due to finding the show from the KC website or not, this was needed as found [further down the document](#KC_website), due to not all the classes having championship in their name. This is explained where used chronologically [here](#link_to_show_end)

[back to the top](#top)

In [6]:
champ_placement_instance = chp.champ_placement()

def find_classes(self, months_ago=0, KC_website = True, print_statement=False):
    """
    Extracts information about championship classes from the agility plaza website and organizes it into a pandas DataFrame.

    Args:
        self: Instance of the class containing the method.
        months_ago (int, optional): An integer indicating how many months ago the function should search for championship classes. Default is 0, representing the current month.
        KC_website (bool, optional): A boolean indicating if the most recent championship show should be found by the KC website (True) or by if the show has championship in the name (False), default ==True.
        print_statement (bool, optional): A boolean indicating whether to print statements during the execution of the function. Default is False.

    Returns:
        pandas.DataFrame: A DataFrame containing information about championship classes, including the class name, link, and height.

    Description:
        This function first retrieves the link and name of the most recent championship competition by calling the 'recent_champ' method. It then sends an HTTP GET request to the retrieved link, parses the response content using BeautifulSoup, and locates all 'card-block' elements within the HTML soup. It iterates through each 'card-block' element to find the championship classes. For each class found, it extracts the class name and link and appends them to a list. After gathering all class information, it creates a pandas DataFrame with columns for the class name, link, and height. The height is derived from the second word in the class name. The function sets the DataFrame index to be composed of the first two words extracted from the class name. Finally, it returns the DataFrame containing the championship class information.
    """
    if KC_website ==True:
        show_link = self.last_show_results_link
    else:
        show_link, show_name = self.recent_champ(months_ago, print_statement)
    
    response = requests.get(show_link)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Finding the day and classes in that day
    div_elements = soup.find_all("div", class_="card-block")
    
    class_data = []
    for day_div in div_elements:
        for a in day_div.find_all('a'):
            if "Championship Jumping" in a.text:
                name = a.text
                link = "agilityplaza.com" + a.get('href')
                class_data.append((name, link))
            elif "Championship Agility" in a.text:
                name = a.text
                link = "agilityplaza.com" + a.get('href')
                class_data.append((name, link))
    
    # Create a pandas DataFrame
    df = pd.DataFrame(class_data, columns=['Class Name', 'Link'])
    df['class number'] = df['Class Name'].apply(lambda x: ' '.join(x.split()[:1]))
    df.set_index('class number', inplace=True)
    df['Height'] = df['Class Name'].apply(lambda x: x.split()[1] if len(x.split()) >= 2 else None)
    return df

array = find_classes(champ_placement_instance, months_ago = 2, print_statement = False)
array

Most recent show (2024-04-14 00:00:00) was 'scottish border collie' but it was cancelled.
Last show to run was scunthorpe
Matching rows found:
Scunthorpe Championship Agility Show


Unnamed: 0_level_0,Class Name,Link,Height
class number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1a,1a Lge Championship Agility,agilityplaza.com/agilityClass/1112366360/results,Lge
1b,1b Lge Championship Jumping,agilityplaza.com/agilityClass/1905479051/results,Lge
13a,13a Int Championship Agility,agilityplaza.com/agilityClass/2065523480/results,Int
13b,13b Int Championship Jumping,agilityplaza.com/agilityClass/1906823297/results,Int


[back to top](#top)
<a id="KC_website"></a>

### This only works for shows with championship in the name. Lots of shows don't have this -> need to sort this out to get the appropriate show

KCI, Derbyshire, Lune Valley, Thames, ..... do not have this

This is going to be solved using the KC page https://www.thekennelclub.org.uk/events-and-activities/agility/already-competing-in-agility/qualifying-shows-for-the-kennel-club-events/ to find the shows. This will not work with the data collection database idea

This is saved in a new python file to create the database each year

In [7]:
heights = ['Lge', 'Int', 'Med', 'Sml']
champ_show_link = "https://www.thekennelclub.org.uk/events-and-activities/agility/already-competing-in-agility/qualifying-shows-for-the-kennel-club-events/"

response = requests.get(champ_show_link)
champ_soup = BeautifulSoup(response.content, 'html.parser')

# print(type(champ_show_link))

soup = champ_soup.find_all("details", class_ = "a-details")

for event in soup:
    summaries = event.find_all("summary")
    for summary in summaries:
        if "Championship" in summary.get_text():
            height  = summary.get_text().split(' ')[-1]
            print(summary)
print(champ_show_link)

<summary class="a-details__summary">Championship Classes – small</summary>
<summary class="a-details__summary">Championship Classes – medium</summary>
<summary class="a-details__summary">Championship Classes – intermediate</summary>
<summary class="a-details__summary">Championship Classes – large</summary>
https://www.thekennelclub.org.uk/events-and-activities/agility/already-competing-in-agility/qualifying-shows-for-the-kennel-club-events/


In [8]:
# Initialize dictionaries to store data
data = {'small': [], 'medium': [], 'intermediate': [], 'large': []}

# Iterate through events
for event in soup:
    summaries = event.find_all("summary")
    for summary in summaries:
        # Find the table after the summary
        table = summary.find_next("table")
        if "Championship" in summary.get_text():
#             print(summary.get_text())

            # Find the table after the summary
            table = summary.find_next("table")
            
            #Find the height
            height  = summary.get_text().split(' ')[-1]
            if table:
                # Extract and process table content
                for row in table.find_all("tr"):
                    cells = row.find_all("td")
                    if cells:
                        show_name = [cell.get_text(strip=True) for cell in cells][0].lower()
                        # Extract date
                        date = [cell.get_text(strip=True) for cell in cells][1]
                        # Append show name and date to respective height category
                        data[height.lower()].append((show_name, date))


                        
# Combine data for all heights
combined_data = []

# Create set of all show names
all_show_names = set()
for height_shows in data.values():
    for show in height_shows:
        all_show_names.add(show[0])

# Iterate through all show names and check if each height is present
for show_name in all_show_names:
    show_info = {'Show Name': show_name}
    for height, height_shows in data.items():
        height_present = any(show[0] == show_name for show in height_shows)
        show_info[height.capitalize()] = height_present
    combined_data.append(show_info)

# Create combined dataframe
combined_df = pd.DataFrame(combined_data)

# Add date column to combined dataframe
for height, height_shows in data.items():
    for show in height_shows:
        show_name = show[0]
        date = show[1]
        combined_df.loc[combined_df['Show Name'] == show_name, 'Date'] = date


# Extracting date and comments
dates = []
comments = []
for item in combined_df['Date']:
    date_parts = item.split('(')
    date = date_parts[0].strip()
    comment = date_parts[1].strip(')') if len(date_parts) > 1 else ''
    dates.append(date)
    comments.append(comment)

# Creating a DataFrame
# data = {'Date': dates, 'Comments': comments}
combined_df['Date'] = dates
combined_df['Comments'] = comments
# Convert 'Date' column to datetime
combined_df['Date'] = pd.to_datetime(combined_df['Date'], errors='coerce')

# Sort the DataFrame by 'Date' column


remove_words = ['DTC', 'Dog', 'Training', 'Society', 'and', '&', 'Club', 'in', 'In']
remove_words = np.char.lower(remove_words)
def clean_title(title):
    words = title.split()
    cleaned_words = [word for word in words if word not in remove_words]
    return ' '.join(cleaned_words)

# Apply the cleaning function to the 'Show Name' column
combined_df['Show Name'] = combined_df['Show Name'].apply(clean_title)

# Group by both 'Show Name' and 'Date' and aggregate with 'any' to combine rows
combined_df = combined_df.groupby(['Show Name', 'Date']).any().reset_index()

combined_df = combined_df.sort_values(by='Date')

# combined_df.to_csv('Champ shows lowercase.csv')

In [9]:
combined_df.head()

Unnamed: 0,Show Name,Date,Small,Medium,Intermediate,Large,Comments
2,derbyshire agility,2024-01-27,False,False,True,True,True
28,wyre (lancs) agility,2024-03-17,True,True,True,True,False
27,wye valley,2024-04-06,True,True,False,False,True
17,scunthorpe obedience agility,2024-04-13,False,False,True,True,False
15,scottish border collie,2024-04-14,True,True,True,True,True


#### The table of championship shows are stored above
to find if on plaza involuves going to agility net, finding the show and then seeing who the processer is. However this is tricky to do as the agility net website has a `<div>` class that contains nothing in the soup. This means that it uses a jravascript module to load the data and can't be used with beautifulsoup but needs a webdriver with the `selenium` package. Come to this later as an extension.

[back to top](#top)
<a id ="load_most_recent"></a>

# Finding the most recent champ show and loading the soup
- finidning most recent on KC
- creating the class instance
- [searching](#searching) on plaza


This uses the dataframe above to find the most recent champ show and then looks on plaza

Ideas:
- find the most recent one on here and then look for that name in plaza, if it doesn't come up in plaza then it must be on a different show processor, return an error "show not found"
- to find last on plaza then you'd have to search for the first one of the shows in the database that comes up. This will take longer I recon

In [10]:
shows_df = pd.read_csv("Champ shows lowercase.csv", index_col = 0) #, parse_dates = [2])
shows_df['Date'] = pd.to_datetime(combined_df['Date'])

current_date = datetime.now().date()
current_date = pd.Timestamp(datetime.now().date())
print(shows_df['Date'][5])
print(current_date)

past_shows = shows_df[shows_df['Date'] <= current_date]

if past_shows.empty:
    raise ValueError("No shows found in the dataframe. No champ shows have occurred this year yet")

sorted_df = past_shows.sort_values(by='Date', ascending=False)

most_recent = sorted_df.iloc[0]

2024-07-06 00:00:00
2024-04-29 00:00:00


In [11]:
# Read the CSV file into a DataFrame
shows_df = pd.read_csv("Champ shows lowercase.csv", index_col=0)
shows_df['Date'] = pd.to_datetime(shows_df['Date'])

# Get the current date as a pandas Timestamp object
current_date = pd.Timestamp(datetime.now().date())

# Filter the DataFrame to include only shows after the current date
upcoming_shows = shows_df[shows_df['Date'] > current_date]

# If there are no upcoming shows, raise an error
if upcoming_shows.empty:
    raise ValueError("No upcoming shows found in the dataframe.")

# Otherwise, select the next upcoming show
next_show = upcoming_shows.iloc[0]

print("Next upcoming show:")
print(next_show['Show Name'])

Next upcoming show:
woodside


#### creating the funciton in the class

In [12]:
class nearest_show():

    def __init__(self):
        self.current_date = datetime.now().date()
        self.last_show = self.nearest_shows_KCwebsite()
        
    
    def nearest_shows_KCwebsite(self, next_show = False):
        shows_df = pd.read_csv("Champ shows lowercase.csv", index_col = 0) #, parse_dates = [2])
        shows_df['Date'] = pd.to_datetime(combined_df['Date'])

        current_date =  self.current_date
        current_date = pd.Timestamp(datetime.now().date())
        
        past_shows = shows_df[shows_df['Date'] <= current_date]
        
        if past_shows.empty:
            raise ValueError("No shows found in the dataframe. No champ shows have occurred this year yet")
        
        sorted_df = past_shows.sort_values(by='Date', ascending=False)
        
        most_recent = sorted_df.iloc[0]['Show Name']

        if next_show == True:
            # Filter the DataFrame to include only shows after the current date
            upcoming_shows = shows_df[shows_df['Date'] > current_date]
            
            # If there are no upcoming shows, raise an error
            if upcoming_shows.empty:
                next_show = None
            else:
                # Otherwise, select the next upcoming show
                next_show = upcoming_shows.iloc[0]
            return most_recent, next_show
        else:
            return most_recent

In [13]:
import pandas as pd
from datetime import datetime

class NearestShow:

    def __init__(self):
        self.current_date = datetime.now().date()

        
        self.shows_df = pd.read_csv("Champ shows lowercase.csv", index_col=0)
        self.shows_df['Date'] = pd.to_datetime(self.shows_df['Date'])
        self.last_show = self.nearest_show(print_statement = False)
        self.next_show = self.nearest_shows(next_show=True)

    def nearest_show(self, print_statement=True):
        """
        Finds the nearest show based on the current date.
        
        Returns:
            str: The name of the most recent show if not cancelled. 
        Raises:
            ValueError: If no shows are found in the dataframe or no champ shows have occurred this year yet.
        """
        current_date = pd.Timestamp(datetime.now().date())
        
        past_shows = self.shows_df[self.shows_df['Date'] <= current_date]
        
        if past_shows.empty:
            raise ValueError("No shows found in the dataframe. No champ shows have occurred this year yet")
        
        sorted_df = past_shows.sort_values(by='Date', ascending=False)
        
        most_recent = sorted_df.iloc[0]

        if most_recent['Comments'] == True:
            if print_statement == True:
                print(f"Most recent show ({most_recent['Date']}) was '{most_recent['Show Name']}' but it was cancelled.")
            # Find the previous show before the cancelled one
            prev_show_index = sorted_df.index[sorted_df['Comments'].shift(-1).fillna(False)].tolist()[0]
            prev_show = self.shows_df.loc[prev_show_index]['Show Name']
            return prev_show
        else:
            return most_recent['Show Name']

    def nearest_shows(self, next_show=False, print_statement = True):
        """
        Finds the nearest show based on the current date and the previous show before the cancellation.
        
        Args:
            next_show (bool): If True, returns the next upcoming show instead of the most recent one.
        
        Returns:
            tuple or str: A tuple containing the name of the most recent show and the previous show before cancellation.
                          If next_show is True, returns the name of the next upcoming show.
        Raises:
            ValueError: If no shows are found in the dataframe or no champ shows have occurred this year yet.
        """
        current_date = pd.Timestamp(datetime.now().date())
        
        if next_show==True:
            upcoming_shows = self.shows_df[self.shows_df['Date'] > current_date]
            if upcoming_shows.empty:
                raise ValueError("No upcoming shows found.")
            next_show = upcoming_shows.iloc[0]
            return next_show['Show Name']
            
        else:
            past_shows = self.shows_df[self.shows_df['Date'] <= current_date]
            
            if past_shows.empty:
                raise ValueError("No shows found in the dataframe. No champ shows have occurred this year yet")
            
            sorted_df = past_shows.sort_values(by='Date', ascending=False)
            
            most_recent = sorted_df.iloc[0]
    
            if most_recent['Comments'] == True:
                if print_statment == True:
                    print(f"Most recent show ({most_recent['Date']}) was '{most_recent['Show Name']}' but it was cancelled.")
                # Find the previous show before the cancelled one
                prev_show_index = sorted_df.index[sorted_df['Comments'].shift(-1).fillna(False)].tolist()[0]
                prev_show = self.shows_df.loc[prev_show_index]['Show Name']
                if print_statement ==True:
                    print(f"Previous show before cancellation was '{prev_show}'")
                return most_recent['Show Name'], prev_show
            else:
                return most_recent['Show Name']


In [14]:
show = chp.champ_placement()
print(show.last_show)
print(show.next_show)
print(show.removed_words)

show.shows_df

Most recent show (2024-04-14 00:00:00) was 'scottish border collie' but it was cancelled.
Last show to run was scunthorpe
Matching rows found:
Scunthorpe Championship Agility Show
scunthorpe
woodside
['dtc' 'dog' 'training' 'society' 'and' '&' 'club' 'in' 'in' 'obedience'
 '(dorset)' 'district' '(lancs)' 'show' 'championship' 'agility']


Unnamed: 0,Show Name,Date,Small,Medium,Intermediate,Large,Comments
2,derbyshire,2024-01-27,False,False,True,True,True
27,wyre,2024-03-17,True,True,True,True,False
26,wye valley,2024-04-06,True,True,False,False,True
17,scunthorpe,2024-04-13,False,False,True,True,False
15,scottish border collie,2024-04-14,True,True,True,True,True
25,woodside,2024-05-04,True,True,True,True,False
22,vyne,2024-05-05,True,True,True,True,False
16,scottish kennel,2024-05-19,True,True,True,True,False
13,nottingham,2024-05-26,True,True,True,True,False
8,hinckley,2024-06-01,True,True,True,True,False


<a id="searching"></a>
[to most recent](#load_most_recent) <br>
[top](#top)

#### Now the filter for the last show can be found using this.

however due to some of the words being removed from the KC website the titles of all the shows in the soup on plaza are also going to have to have these words removed adn maybe "championship" too in order to get their name properly working

In [15]:
show = chp.champ_placement()
elements = show.month_soup(months_ago = 0)

# Convert each Tag object to a string
elements_as_strings = [str(element) for element in elements]

# Join the strings
combined_html = ''.join(elements_as_strings)

# Create a new BeautifulSoup object from the combined HTML
soup = BeautifulSoup(combined_html, 'html.parser')

Most recent show (2024-04-14 00:00:00) was 'scottish border collie' but it was cancelled.
Last show to run was scunthorpe
Matching rows found:
Scunthorpe Championship Agility Show


In [16]:
# Get the last show name from the `champ_placement` object

def recent_show_link(self, print_statement=True):
    """
    Retrieves the link associated with the most recent show from the `champ_placement` object. The most recent show is taken from the KC website so if nothing is found then the show might not be on plaza or in this current month.
    
    Args:
        print_statement (bool, optional): If True, prints statements during execution. Defaults to True.
    
    Returns:
        data_href (str): The next part of the link to access the show results page. This needs to be combined to the base link.
    """
    elements = self.month_soup(months_ago = 0)

    # Convert each Tag object to a string
    elements_as_strings = [str(element) for element in elements]
    
    # Join the strings
    combined_html = ''.join(elements_as_strings)
    
    # Create a new BeautifulSoup object from the combined HTML
    soup = BeautifulSoup(combined_html, 'html.parser')

    
    last_show = self.nearest_show()
    if print_statement ==True:
        print(f"Last show to run was {last_show}")
    
    def clean_title(title):
            words = title.split()
            cleaned_words = [word for word in words if word not in remove_words]
            return ' '.join(cleaned_words)
    
    
    if last_show != "agility club":
        remove_words = self.removed_words
    
    # Find <td> elements containing the last show name
    show_row = soup.find('td', string=lambda text: last_show in clean_title(text.lower()))
    
    #  If the row is found then print
    if show_row:
        if print_statement ==True:
            print("Matching rows found:")
            print(show_row.text)
        
        # Find the parent <tr> tag
        parent_tr = show_row.find_parent('tr')
    
        # Find the data-href attribute within the <tr> tag
        data_href = parent_tr.get('data-href')
    
        # If data-href exists, print it
        if data_href:
            return data_href
            if print_statement ==True:
                print("Data-href link:", data_href)
        else:
            print("No data-href link found.")
    else:
        print("No matching rows found.")

href = recent_show_link(show)

Most recent show (2024-04-14 00:00:00) was 'scottish border collie' but it was cancelled.
Last show to run was scunthorpe
Matching rows found:
Scunthorpe Championship Agility Show


In [17]:
show.last_show_results_link

'https://www.agilityplaza.com/competition/1076669116/results'

[back to the top](#top)
<a id= "link_to_show_end"></a>

#### Now the [`find_classes`](#find_classes) function can be used to find the most the links to the results for each height

[find classes](#find_classes)

In [20]:
show.find_classes()


Unnamed: 0_level_0,Class Name,Link,Height
class number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1a,1a Lge Championship Agility,agilityplaza.com/agilityClass/1112366360/results,Lge
1b,1b Lge Championship Jumping,agilityplaza.com/agilityClass/1905479051/results,Lge
13a,13a Int Championship Agility,agilityplaza.com/agilityClass/2065523480/results,Int
13b,13b Int Championship Jumping,agilityplaza.com/agilityClass/1906823297/results,Int


<a id="overall"></a>

## Creating the results for the overall standings

In [33]:

def overall_standings(self, height):
    df = self.find_classes()
    links = np.array(df[df['Height'] == height]['Link'])

    if len(links) != 2:
        raise ValueError("Can't find both results. Second round of the show has not run yet")
        
    return links

# df = show.

overall_standings(show, 'Lge')

array(['agilityplaza.com/agilityClass/1112366360/results',
       'agilityplaza.com/agilityClass/1905479051/results'], dtype=object)

In [None]:
def df_results(organisation_name_to_find, first_round_class_name, second_round_class_name):
    '''creates a list containing dataframes of the results of both rounds in the championship
    
    INPUTS
    organisation_name_to_find - REQUIRED, the name of the organisation/show as shown on Agility Plaza
    first_round_class_name, second_round_class_name - REQUIRED, the name of both of the championship rounds as shown on Agility Plaza, in the order in which they are ran at the competiiton
    
    OUTPUT
    results_df_list - a list containing the dataframes of both results in the championship with the first class that was run being in index 0
    '''
    
    
    #getting the results links using 'results_and_running_orders' function
    subpage_2_results_links, subpage_2_running_order_links = results_and_running_orders(organisation_name_to_find, first_round_class_name, second_round_class_name)
    
    #creating an empty list for the data frame of each result to go into
    results_df_list = list(np.zeros(len(subpage_2_results_links)))
    
    #looping over the list to get the results for both rounds
    for i in range(len(subpage_2_results_links)):
        result = subpage_2_results_links[i]
        
        #getting the result soup from the links of each round
        response = requests.get(result)
        soup_results = BeautifulSoup(response.text, 'html.parser')

        table_data = []
        table = soup_results.find('table-')  # Locate the table
        
        #creating the table that can be used with pandas 
        if table:
            rows = table.find_all('tr')  # Find all rows in the table
            for row in rows:
                row_data = []  # Create a list for each row
                cells = row.find_all('td')  # Find all cells in the row
                for cell in cells:
                    row_data.append(cell.get_text())  # Append cell data to the row list
                table_data.append(row_data)  # Append the row list to the table_data list

        # Extract table headings into a list
        column_headings = ['place1', 'place2', 'posh names', 'name', 'type','faults', 'time']
        #dropping the useless columns to us
        df = pd.DataFrame(table_data, columns = column_headings).drop(0).drop(columns=['place1','place2','posh names'])
        #creating a seperate human and dog column
        df[['Human', 'Dog']] = df['name'].str.split(' & ', expand=True)

        selected_columns = ['Human', 'Dog']
        
        #creating a new df that only has human and dog columns
        df_new = df[selected_columns]
        results_df_list[i] = df_new
        
    return results_df_list
    