In [1]:
from django.db.models import Q
from base_app.models import ConfigItems
from leagues.models import Competition, Season
from base_app.decorators import cleanup_selenium_instances,timed_retry
from games.models import Game

import pandas as pd
import re
from datetime import datetime
from pprint import pprint


In [2]:
BASE_FILE_DIR = "D:\All Season Data\Files"
BASE_LOGS_DIR = "D:\All Season Data\Logs"

In [4]:
def filter_file_details(filename):
    """
    Extracts competition, season, and type from a filename using a regex.

    Args:
        filename (str): The name of the file to parse.

    Returns:
        dict: A dictionary containing the extracted details, or None if the
              filename does not match the expected format.
    """
    # Regex to capture competition, season, and type from the filename
    # The pattern matches:
    # 1. A non-greedy group for the competition name (.*?)
    # 2. A specific group for the season, which can be 'YYYY' or 'YYYY_YYYY'
    # 3. A word group for the type (\w+)
    regex = r'^(.*?)_(\d{4}(?:_\d{4})?)_(\w+)\.xlsx$'
    match = re.search(regex, filename)

    if match:
        # Extract the captured groups and clean them up
        competition_name = match.group(1).replace('_', ' ')
        season_value = match.group(2).replace('_', '/')
        type_value = match.group(3)

        return {
            'competition': competition_name,
            'season': season_value,
            'type': type_value
        }
    else:
        return {}

def filter_log_file_details(filename):
    league,season = [x for x in filename.split("_-_")][:2]
    
    return {
            'competition': league.replace("_"," "),
            'season': season.replace("_","/"),
        }
    

In [5]:
def get_all_files(directory_path="D:\All Season Data\Files"):
    files_data = []
    fl = False
    for lvl_1,lvl_2,file_list in os.walk(directory_path,):
        if file_list == []:
            continue
        batch_no = next((x.split("_")[0].capitalize() for x in lvl_1.split("\\") if "_MetaData" in x),"B6")
        confederation,region = [x.strip(" ") for x in lvl_1.split("\\") ][-2:]
        for file_name in file_list :
            df = None
            temp = {}
            temp['batch'] = batch_no
            temp['confederation'] = confederation
            temp['region'] = region
            temp = {**temp,**filter_file_details(file_name)}
            try :
                df = pd.read_excel(f"{lvl_1}/{file_name}")
                temp['matches'] = df.shape[0]
                temp['team_counts'] = len(set(list(set(df['home_team']))+list(set(df['away_team'])))) 
                del df
            except :
                temp['team_counts'] = 0 
                temp['matches'] = 0
                if dl:
                    del df
            files_data.append(temp)
        
    return pd.DataFrame(files_data)
file_df = get_all_files()

In [14]:
# list(set(file_df['competition']))[0]
file_df[file_df['competition'] == '1 SNL'].sort_values('season')

Unnamed: 0,batch,confederation,region,competition,season,type,matches,team_counts
1546,B4,UEFA,Slovenia,1 SNL,2017/2018,events,180,10
3121,Old,UEFA,Slovenia,1 SNL,2017/2018,events,180,10
2199,B5,UEFA,Slovenia,1 SNL,2017/2018,events,180,10
2504,B6,UEFA,Slovenia,1 SNL,2017/2018,events,47,10
1547,B4,UEFA,Slovenia,1 SNL,2018/2019,events,4,8
3122,Old,UEFA,Slovenia,1 SNL,2018/2019,events,4,8
2505,B6,UEFA,Slovenia,1 SNL,2018/2019,events,1,2
2200,B5,UEFA,Slovenia,1 SNL,2018/2019,events,4,8
1548,B4,UEFA,Slovenia,1 SNL,2019/2020,events,180,10
2506,B6,UEFA,Slovenia,1 SNL,2019/2020,events,3,6


In [15]:
def parse_and_look_at_errors(file_path) :
    with open(file_path,"r",encoding='utf-8') as f :
        all_logs = [f"2025{x}".replace("\n"," ").replace("\t"," ").replace('[','').replace(']','') for x in f.read().split("\n2025")]
    err_cnts = {
        "Total Error Count" : 0,        
        "TimeoutException" : 0,
        "ReadTimeoutError" : 0,
        "InvalidSessionIdException" : 0,
        "NoSuchWindowException" : 0,
        "AttributeError" : 0,
        "ValueError" : 0,
        "NoSuchElementException" : 0,
        "HTTPConnectionPool" : 0
    }
    for i,x in enumerate(all_logs) :
        if ('Error' in x or 'Exception' in x.lower()) and 'Cleanup failed' not in x:
           err_cnts["Total Error Count"] += 1
        for k,v in err_cnts.items() :
            if k in x :
                err_cnts[k] = v+1
    return err_cnts

def parse_and_look_fixture_count(file_path) :
    with open(file_path,"r",encoding='utf-8') as f :
        all_logs = [f"2025{x}".replace("\n"," ").replace("\t"," ").replace('[','').replace(']','') for x in f.read().split("\n2025")]
    count_logs = [x for x in all_logs if 'Found a total of ' in x and ' matches' in x ]
    # print(file_path.split('/')[-1],count_logs)
    for x in all_logs :
        match = re.search(r'Found a total of (\d+) matches', x)
    
        if match:
            match_count = int(match.group(1))
            return match_count
    return None

In [16]:
def get_all_logs(directory_path="D:\All Season Data\Logs"):
    log_data = []
    all_errs = []
    done_list = []
    for lvl_1,lvl_2,file_list in os.walk(directory_path,):
        if file_list == []:
            continue
        batch_no = next((x.split("_")[0].capitalize() for x in lvl_1.split("\\") if "_runtime" in x),"B6")
        confederation,region = [x.strip(" ") for x in lvl_1.split("\\") ][-2:]
        for file_name in file_list :
            # num = file_name.split("_")[-1]
            # proc_num  = int(num) if num.isdigit() else "SHINSHINAKIBABLABU"
            # file_name = file_name.replace(f"_{proc_num}","")
            
            # if file_name in done_list :
            #     print("no need its done : ",file_name)
            #     continue
            temp = {}
            temp['batch'] = batch_no
            temp['confederation'] = confederation
            temp['region'] = region
            temp = {**temp,**filter_log_file_details(file_name)}
            temp['fixture_count'] = parse_and_look_fixture_count(f"{lvl_1}/{file_name}")
            # temp = {**temp,**parse_and_look_at_errors(f"{lvl_1}/{file_name}")}
            comp_obj = Competition.objects.get(competition_name=temp['competition'])
            temp['confederation']= comp_obj.confederation
            temp['region'] = comp_obj.country
            temp['competition'] = comp_obj.name_scoresaway
            temp['competition_alt'] = comp_obj.competition_name
            log_data.append(temp)
            # done_list.append(file_name)
    return pd.DataFrame(log_data)
    # return pd.DataFrame(logs_data)
err_df = get_all_logs()

In [18]:
merged_df = pd.merge(file_df[file_df['type']=='events'],err_df,how='left',on=['batch','confederation','region','competition','season'])

In [17]:
err_df.head()

Unnamed: 0,batch,confederation,region,competition,season,fixture_count,competition_alt
0,A1,UEFA,Germany,2. Bundesliga,2022/2023,306.0,2. Bundesliga
1,A1,UEFA,Germany,2. Bundesliga,2023/2024,306.0,2. Bundesliga
2,A1,AFC,Australia,A-League Men,2018/2019,140.0,A-League
3,A1,AFC,Australia,A-League Men,2024/2025,176.0,A-League
4,A1,AFC,AFC,AFC Champions League Elite,2022,137.0,AFC Champions League


There are a lot of info about all batches, so considering max num of fixtures from the logs list and only B^ for flie list

In [19]:
clean_df = err_df[['confederation','region','competition','competition_alt','season']].drop_duplicates()

In [20]:
fixture_df = pd.DataFrame()
for i,row in clean_df.iterrows():
    confed = row.get('confederation')
    reg = row.get('region')
    comp = row.get('competition')
    seas = row.get('season')
    
    # Build the query safely using @
    query_string = "confederation == @confed and region == @reg and competition == @comp and season == @seas"
    
    # Execute the query
    temp_df = err_df.query(query_string)
    # temp_df = err_df.query(f"confederation=='{row.get('confederation')}' AND region=='{row.get('region')}' AND competition=='{row.get('competition')}'  AND season=='{row.get('season')}'")
    temp_df = temp_df.dropna().sort_values('fixture_count')
    # print(temp_df['fixture_count'])
    if temp_df.shape[0] > 0 :
        fixture_df = pd.concat([fixture_df, temp_df.head(1)], ignore_index=True)


In [21]:
last_batch = file_df[(file_df['type']=='events') & ((file_df['batch']=='B6') | (file_df['batch']=='Old'))]

In [22]:
print("\n--- Merged DF Columns ---")
print(merged_df.columns)


--- Merged DF Columns ---
Index(['batch', 'confederation', 'region', 'competition', 'season', 'type',
       'matches', 'team_counts', 'fixture_count', 'competition_alt'],
      dtype='object')


In [23]:
merged_df = pd.merge(last_batch,fixture_df,how='left',on=['confederation','region','competition','season'])
merged_df.to_excel("COMPARE_CLEANED_DATA.xlsx",index=False)

In [22]:
fixture_df.to_excel("All_Fixture_Counts.xlsx",index=False)

In [23]:
misc_df = last_batch.reset_index().drop('index',axis=1)

In [112]:
misc_df.to_excel("MISC_DF.xlsx",index=False)

In [113]:
comp_alt_map = {}
for i,row in fixture_df[['competition','competition_alt']].iterrows():
    comp = row.get('competition')
    comp_alt = row.get('competition_alt')
    # if "1" in comp or "1" in comp_alt :
        # print(comp,comp_alt)
    if comp and comp_alt and comp not in comp_alt_map:
        comp_alt_map[comp] = comp_alt

In [126]:
issues = []
new_comp,new_comp_alt = "",""
for i, row in misc_df.iterrows():
    confed = row.get('confederation')
    reg = row.get('region')
    comp = row.get('competition')
    comp_alt = str(comp_alt_map.get(comp))
    seas = row.get('season')
    comp_sm = SA_TO_COMP_NAME_MAP.get(comp)
    comp_fm = FM_TO_COMP_NAME_MAP.get(comp)
    # Build the query safely using @
    if "1" in comp :
        new_comp = comp.replace("1 ","1. ")
        new_comp_alt = comp_alt.replace("1 ","1. ")
        query_string = "confederation == @confed and region == @reg and ((competition == @comp or competition_alt == @comp_alt) or (competition == @new_comp or competition_alt == @new_comp_alt)) and season == @seas"
    elif "2" in comp:
        new_comp = comp.replace("2 ","2. ")
        new_comp_alt = comp_alt.replace("2 ","2. ")
        query_string = "confederation == @confed and region == @reg and ((competition == @comp or competition_alt == @comp_alt) or (competition == @new_comp or competition_alt == @new_comp_alt)) and season == @seas"
    else:
        query_string = "confederation == @confed and region == @reg and (competition == @comp or competition_alt == @comp_alt) and season == @seas"
        
    df = fixture_df.query(query_string)
    if df.shape[0] :
        misc_df.loc[i,'total_fixtures'] = df['fixture_count'].iloc[0]
    else:
        if "25" in seas :
            # print("Easy 24/25",f"{confed} | {reg} | {comp} - {comp_alt} | {seas}")
            continue
        print("Problem : ")
        print(f"{confed} | {reg} | {comp} - {comp_alt} | {seas}")
        print("In Fixture : \n"
        f"confederation == '{confed}' and "
        f"region == '{reg}' and "
        f"((competition == '{comp}' OR competition_alt == '{comp_alt}') or "
        f"(competition == '{new_comp}' OR competition_alt == '{new_comp_alt}')) and "
        f"season == {seas}"
        )
        issues.append(f"{confed} | {reg} | {comp} - {comp_alt} | {seas}")
        print()

Problem : 
UEFA | England | League Two - EFL League Two | 2017/2018
In Fixture : 
confederation == 'UEFA' and region == 'England' and ((competition == 'League Two' OR competition_alt == 'EFL League Two') or (competition == '1. Division' OR competition_alt == 'None')) and season == 2017/2018

Problem : 
UEFA | France | Ligue 1 - Ligue 1 | 2017/2018
In Fixture : 
confederation == 'UEFA' and region == 'France' and ((competition == 'Ligue 1' OR competition_alt == 'Ligue 1') or (competition == 'Ligue 1' OR competition_alt == 'Ligue 1')) and season == 2017/2018

Problem : 
UEFA | Germany | 2 Bundesliga - None | 2023/2024
In Fixture : 
confederation == 'UEFA' and region == 'Germany' and ((competition == '2 Bundesliga' OR competition_alt == 'None') or (competition == '2. Bundesliga' OR competition_alt == 'None')) and season == 2023/2024

Problem : 
UEFA | Greece | Super League 1 - Super League Greece | 2018/2019
In Fixture : 
confederation == 'UEFA' and region == 'Greece' and ((competition == 

In [128]:
results_df = pd.DataFrame()
issues = []
for i, row in fixture_df.iterrows():
    confed = row.get('confederation')
    reg = row.get('region')
    comp = row.get('competition')
    comp_alt = row.get('competition_alt')
    seas = row.get('season')
    fx = row.get('fixture_count')
    
    # # Build the query safely using @
    # query_string = "confederation == @confed and region == @reg and (competition == @comp or competition == @comp_alt) and season == @seas"
    
    # --- 2. Clean the parameters (lower case, remove '.') ---
    #    Handles cases where the values might be None or not strings
    comp_clean = str(comp).lower().replace('.', '') if comp else ''
    comp_alt_clean = str(comp_alt).lower().replace('.', '') if comp_alt else ''


    # --- 3. Build the boolean conditions ---
    #    .str.lower() makes the comparison case-insensitive
    #    .str.replace('.', '') removes periods before comparing
    condition = (
        (misc_df['confederation'].str.lower() == confed.lower()) &
        (misc_df['region'].str.lower() == reg.lower()) &
        (misc_df['season'].str.lower() == seas.lower()) &
        (
            (misc_df['competition'].str.lower().str.replace('.', '', regex=False) == comp_clean) |
            (misc_df['competition'].str.lower().str.replace('.', '', regex=False) == comp_alt_clean)
        )
    )

    # --- 4. Apply the filter and append the results ---
    df = misc_df[condition]
    # df = misc_df.query(query_string)
    if df.shape[0] > 0 :
        first_row =  df.head(1)
        first_row['Total_Fixtures'] = fx
        results_df = pd.concat([results_df, first_row], ignore_index=True)
    else:
        print("Problem : ")
        print(f"{confed} | {reg} | {comp} | {seas}")
        print("In Fixture : \n"
        f"confederation == '{confed}' and "
        f"region == '{reg}' and "
        f"(competition == '{comp}' or "
        f"competition_alt == '{comp}') and "
        f"season == '{seas}'"
        )
        issues.append(f"{reg} | {comp}")
        print()
results_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_row['Total_Fixtures'] = fx
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_row['Total_Fixtures'] = fx
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_row['Total_Fixtures'] = fx
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer]

Problem : 
CONMEBOL | Colombia | Primera A | 2025
In Fixture : 
confederation == 'CONMEBOL' and region == 'Colombia' and (competition == 'Primera A' or competition_alt == 'Primera A') and season == '2025'

Problem : 
CONMEBOL | Peru | Liga 1 | 2025
In Fixture : 
confederation == 'CONMEBOL' and region == 'Peru' and (competition == 'Liga 1' or competition_alt == 'Liga 1') and season == '2025'



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_row['Total_Fixtures'] = fx
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_row['Total_Fixtures'] = fx
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_row['Total_Fixtures'] = fx
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer]

Unnamed: 0,batch,confederation,region,competition,season,type,matches,team_counts,total_fixtures,Total_Fixtures
0,B6,AFC,Australia,A-League Men,2017/2018,events,140,10,140.0,140.0
1,B6,AFC,Australia,A-League Men,2018/2019,events,129,10,140.0,140.0
2,B6,AFC,Australia,A-League Men,2019/2020,events,148,11,148.0,148.0
3,B6,AFC,Australia,A-League Men,2020/2021,events,161,12,161.0,161.0
4,B6,AFC,Australia,A-League Men,2021/2022,events,163,12,163.0,163.0


### Check Here for the merged df

In [2]:
all_df = pd.read_excel("COMPARE_CLEANED_DATA.xlsx")
all_df.head(7)

Unnamed: 0,batch_x,confederation,region,competition,season,type,matches,team_counts,batch_y,fixture_count,competition_alt
0,B6,AFC,AFC,AFC Champions League Elite,2018,events,140,46,B1,140.0,AFC Champions League
1,B6,AFC,AFC,AFC Champions League Elite,2019,events,145,51,B1,145.0,AFC Champions League
2,B6,AFC,AFC,AFC Champions League Elite,2020,events,113,49,B1,113.0,AFC Champions League
3,B6,AFC,AFC,AFC Champions League Elite,2021,events,140,45,B1,140.0,AFC Champions League
4,B6,AFC,AFC,AFC Champions League Elite,2022,events,127,46,B1,137.0,AFC Champions League
5,B6,AFC,AFC,AFC Champions League Elite,2023/2024,events,163,53,B1,163.0,AFC Champions League
6,B6,AFC,AFC,AFC Champions League Elite,2024/2025,events,114,27,B1,114.0,AFC Champions League


In [3]:
Competition.objects.all()[0].__dict__

{'_state': <django.db.models.base.ModelState at 0x15b8c2e2350>,
 'id': 1,
 'confederation': 'AFC',
 'country': 'AFC',
 'competition_name': 'AFC Champions League',
 'name_scoresaway': 'AFC Champions League Elite',
 'name_fotmob': 'AFC Champions League Elite',
 'competition_format': 'H',
 'competition_type': 'I',
 'season_start': 7,
 'season_end': 6,
 'event_data_available': True,
 'event_data_url': 'https://www.scoresway.com/en_GB/soccer/afc-champions-league-elite-2024-2025/6ae0gn489uln4bm2q7ir6ruok/fixtures',
 'shot_data_available': False,
 'shot_data_url': ''}

In [140]:
for _, row in all_df.iterrows():
    q |= Q(confederation=row["confederation"],
           country=row["region"],
           competition_name=row["competition"],
           
          )

In [142]:
all_df[(all_df['fixture_count'].isna()) | (all_df['diff'] > 0)].to_excel("Retry_Items.xlsx",index=False)

In [20]:
comp_to_alt_map = {}
for i,row in all_df[['competition','competition_alt']].iterrows():
    comp = row.get("competition")
    comp_alt = row.get("competition_alt")
    if comp in comp_to_alt_map :
        # print("- ",comp)
        if len(comp_to_alt_map[comp]) != 0 or len(comp_to_alt_map[comp]) != 'nan'  :
            continue
    # if len(comp_to_alt_map[comp]) != 0 :
    #     continue
    comp_to_alt_map[comp] = str(comp_alt)
    if "1" in comp or "2" in comp :
        print(f"comp_to_alt['{comp}'] = {comp_alt}")
print(",".join(sorted([f"'{k}'" for k,v in comp_to_alt_map.items() if 'nan' in v.lower()])))

comp_to_alt['J1 League'] = J1 League
comp_to_alt['Thai League 1'] = Thai League 1
comp_to_alt['Liga 1'] = Liga 1
comp_to_alt['1 Division'] = nan
comp_to_alt['Ligue 1'] = nan
comp_to_alt['Ligue 2'] = Ligue 2
comp_to_alt['2 Bundesliga'] = nan
comp_to_alt['Super League 1'] = Super League Greece
comp_to_alt['1 liga'] = nan
comp_to_alt['1 SNL'] = nan
comp_to_alt['1 Lig'] = nan
'1 Division','1 Lig','1 SNL','1 liga','2 Bundesliga','Ekstraklasa','Eliteserien','League Two','Ligue 1','Premier Division','Premiership','Virsliga'


In [23]:
from base_app.helpers import get_name_mappings

In [25]:
get_name_mappings(source='sa',target="n",source_as_file_name=True)

TypeError: get_name_mappings() got an unexpected keyword argument 'source_as_file_name'

Here I've somehow figured out which ones needs to be done again. Now onto the shot details

## Fetch Shot Data retries

In [27]:
def get_all_shot_files(directory_path="D:\All Season Data\Files"):
    files_data = []
    fl = False
    for lvl_1,lvl_2,file_list in os.walk(directory_path,):
        if file_list == []:
            continue
        batch_no = next((x.split("_")[0].capitalize() for x in lvl_1.split("\\") if "_MetaData" in x),"B6")
        confederation,region = [x.strip(" ") for x in lvl_1.split("\\") ][-2:]
        for file_name in file_list :
            if 'shots.xlsx' not in file_name :
                continue
            df = None
            temp = {}
            temp['batch'] = batch_no
            temp['confederation'] = confederation
            temp['region'] = region
            temp = {**temp,**filter_file_details(file_name)}
            try :
                df = pd.read_excel(f"{lvl_1}/{file_name}")
                temp['matches'] = df.shape[0]
                temp['team_counts'] = len(set(list(set(df['home_team']))+list(set(df['away_team'])))) 
                del df
            except :
                temp['team_counts'] = 0 
                temp['matches'] = 0
                if dl:
                    del df
            files_data.append(temp)
        
    return pd.DataFrame(files_data)
shots_df = get_all_shot_files()

In [28]:
shots_df.head()

Unnamed: 0,batch,confederation,region,competition,season,type,matches,team_counts
0,B1,AFC,Australia,A-League,2017/2018,shots,140,10
1,B1,AFC,Australia,A-League,2018/2019,shots,140,10
2,B1,AFC,Australia,A-League,2019/2020,shots,148,11
3,B1,AFC,Australia,A-League,2020/2021,shots,161,12
4,B1,AFC,Australia,A-League,2021/2022,shots,163,12


In [29]:
shots = shots_df.drop(['batch'],axis=1).drop_duplicates()

In [30]:
all_seasons = Season.objects.all().exclude(season_shot_url='')

In [31]:
results_shots = pd.DataFrame()
missing = []
for shot_ssn in all_seasons :
    s_name = shot_ssn.name
    c_name = shot_ssn.competition.competition_name
    conf = shot_ssn.competition.confederation
    region = shot_ssn.competition.country

    comp_clean = str(c_name).lower().replace('.', '') if c_name else ''


    # --- 3. Build the boolean conditions ---
    #    .str.lower() makes the comparison case-insensitive
    #    .str.replace('.', '') removes periods before comparing
    condition = (
        (shots['confederation'].str.lower() == conf.lower()) &
        (shots['region'].str.lower() == region.lower()) &
        (shots['season'].str.lower() == s_name.lower())# &
        # (shots['competition'].str.lower().str.replace('.', '', regex=False) == comp_clean)
    )
# --- 4. Apply the filter and append the results ---
    df = shots[condition]
    # df = misc_df.query(query_string)
    if df.shape[0] > 0 :
        # first_row =  )
        # loc[0, 'Total_Fixtures'] = 
        results_shots = pd.concat([results_shots, df.head(1)], ignore_index=True)
    else:
        temp = {}
        temp['Confederation'] = conf
        temp['Region'] = region
        temp['Competition'] = c_name
        temp['Season'] = s_name
        missing.append(temp)

In [32]:
results_shots.shape, len(missing)

((135, 7), 59)

In [33]:
print(f"Total : {len(all_seasons)} | Found : {shots.shape[0]} | Left : {len(all_seasons) - shots.shape[0] }")

Total : 194 | Found : 127 | Left : 67


In [34]:
results_df

Unnamed: 0,batch,confederation,region,competition,season,type,matches,team_counts,total_fixtures,Total_Fixtures
0,B6,AFC,Australia,A-League Men,2017/2018,events,140,10,140.0,140.0
1,B6,AFC,Australia,A-League Men,2018/2019,events,129,10,140.0,140.0
2,B6,AFC,Australia,A-League Men,2019/2020,events,148,11,148.0,148.0
3,B6,AFC,Australia,A-League Men,2020/2021,events,161,12,161.0,161.0
4,B6,AFC,Australia,A-League Men,2021/2022,events,163,12,163.0,163.0
...,...,...,...,...,...,...,...,...,...,...
446,B6,UEFA,Switzerland,Challenge League,2024/2025,events,180,10,180.0,180.0
447,B6,UEFA,Switzerland,Super League,2017/2018,events,179,10,180.0,180.0
448,B6,UEFA,Switzerland,Super League,2018/2019,events,59,10,180.0,180.0
449,B6,UEFA,Switzerland,Super League,2019/2020,events,180,10,180.0,180.0
