In [1]:
import pandas as pd
import json

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Reading file

In [2]:
short_name = 'dpdl_mum_u15'
folder_name = 'dpdl_mumbai_u15_apr'
file_name = 'dpdl-mumbai-u15'

In [3]:
with open(f'{folder_name}/{file_name}.json', 'r') as f:
    data = json.load(f)
    dt = data[0]['scouting_data']

df = pd.DataFrame(dt)

In [4]:
# df = pd.read_csv(f"{folder_name}/{file_name}.csv")

# Column names prep

In [5]:
col_list = []
for col in df.columns:
    new_col = col.lower()
    new_col = new_col.replace(' ', '_')
    col_list.append(new_col)

df.columns = col_list

In [6]:
# user_list = [10255, 10254, 10253, 10252, 10251, 10250, 10249, 10248, 10247, 10246, 10245, 10365, 10370, 10778, 10881, 10882, 10884, 11052, 11054, 11055, 11087, 11106, 11108, 12005, 12609, 12835, 13065, 13688, 13689]
# df = df[df['user_id'].isin(user_list)]

# Col names excel file

In [7]:
# l = ['absolute_score','user_name','team_name','total_game_time','preferred_position_most_played','total_saves','goal_conceded',
# 'goalkick_accuracy','goalkeeper_throws_completed','punches','handling','progressive_passes_per_90']

# df = pd.Series(l)
# with pd.ExcelWriter("scouting_col_names.xlsx", mode="a", engine="openpyxl", if_sheet_exists="replace") as writer:
#     df.to_excel(writer, sheet_name="goalkeeper", index=False, header=False) 

# # with pd.ExcelWriter("scouting_col_names.xlsx", engine="openpyxl") as writer:
# #     df.to_excel(writer, sheet_name="defensive_midfielder", index=False, header=False) 


# Adding new columns

In [8]:
df['duels_per_90'] = df['ground_duels_per_90'] + df['ariel_duels_per_90']
df['passes_per_90'] = df['short_passes_per_90'] + df['long_passes_per_90']
df['total_passes_per_90'] = df['short_passes_per_90'] + df['long_passes_per_90'] + df['through_balls_per_90'] + df['crosses_per_90'] 
df['goals'] = df['goals_from_close_shot'] + df['goals_from_long_shot'] + df['goals_from_headed_shot']

def to_camel_case(name):
    parts = name.split(' ')
    return ' '.join(x.title() for x in parts)


# Apply camel case conversion to the 'name' column
df['user_name'] = df['user_name'].apply(to_camel_case)

# Final report prep

In [9]:
position_dict = {
    'center_forward|striker' : ['STRIKER', 'RIGHT FORWARD', 'LEFT FORWARD', 'CENTER FORWARD'],
    'left_winger|right_winger' : ['LEFT WINGER' , 'RIGHT WINGER'],
    'attacking_midfielder' : ['ATTACKING MIDFIELDER'],
    'central_midfielder' : ['CENTRAL MIDFIELDER', 'RIGHT MIDFIELDER', 'LEFT MIDFIELDER'],
    'defensive_midfielder' : ['DEFENSIVE MIDFIELDER'],
    'left_back|right_back' : ['LEFT SIDE BACK', 'RIGHT SIDE BACK', 'LEFT WING BACK', 'RIGHT WING BACK'],
    'center_back' : ['CENTER BACK'],
    'goalkeeper' : ['GOALKEEPER']                         
}

In [10]:
def final_scouting_report(position, position_list):
    col_name_df = pd.read_excel("scouting_col_names.xlsx", sheet_name = position, header = None)
    col_name_series = col_name_df[0]

    for i in ['possession_retained_per_90', 'succ_interceptions_per_90', 'short_passes_per_90', 'total_forward_passes_per_90']:
        if i not in col_name_series.values:
            col_name_series = pd.concat([col_name_series, pd.Series([i])])

    new_df = df[col_name_series]
    new_df = new_df[new_df['preferred_position_most_played'].isin(position_list)]
    new_df = new_df.sort_values(by = ['absolute_score'], ascending = False).head(8)
    new_df = new_df.drop(['absolute_score'], axis = 1)

    if position == 'center_forward|striker':
        with pd.ExcelWriter(f"{folder_name}/scouting_final_report_{short_name}_.xlsx", engine="openpyxl") as writer:
            new_df.to_excel(writer, sheet_name = position, index = False) 
    else:
        with pd.ExcelWriter(f"{folder_name}/scouting_final_report_{short_name}_.xlsx", mode="a", engine="openpyxl", if_sheet_exists="replace") as writer:
            new_df.to_excel(writer, sheet_name = position, index = False) 
    

In [11]:
for pos in position_dict:
    final_scouting_report(pos, position_dict[pos])