In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import math
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import datetime
import dateutil.parser
import time
import re
import statsmodels.api as sm
import statsmodels.formula.api as smf
import sklearn
from tqdm import tqdm_notebook
pd.options.mode.chained_assignment = None

In [2]:
team_converter = pd.read_csv("source_files/team_converter.csv",encoding="cp1252")

In [3]:
manager_table = pd.read_csv("output_files/manager_ratings_df.csv",encoding="cp1252")

In [4]:
manager_table.rename(columns={"TM_manager_name":"manager"},inplace=True)
manager_table["predicted_points_per_season_added"] = manager_table["ProjPointsAboveAverage"]
manager_table = manager_table[["manager","predicted_points_per_season_added"]]
manager_table

Unnamed: 0,manager,predicted_points_per_season_added
0,Lucien Favre,3.762138
1,Rudi Garcia,3.399543
2,Diego Simeone,3.267592
3,Maurizio Sarri,3.042755
4,Marcelino,2.853065
5,Jürgen Klopp,2.846813
6,Thomas Tuchel,2.786409
7,Ralph Hasenhüttl,2.686336
8,Luciano Spalletti,2.624679
9,René Girard,2.554083


In [5]:
manager_all_tenures_performance_df = pd.read_csv("output_files/manager_all_tenures_performance_df.csv",encoding="cp1252")
manager_all_tenures_performance_df.sort_values("TM_manager_start_date",inplace=True)
manager_all_tenures_performance_df.reset_index(drop=True,inplace=True)

## Step 1: create columns containing a manager's previous and current clubs ##

In [6]:
manager_previous_club_ids_list = []
manager_current_club_id_list = []
for m in manager_table["manager"]:
    try:
        manager_all_tenures = manager_all_tenures_performance_df[manager_all_tenures_performance_df["TM_manager_name"]==m]
        manager_previous_club_ids_list.append(list(manager_all_tenures["TM_team_id"]))
        manager_last_tenure = manager_all_tenures.iloc[-1]
        if dateutil.parser.parse(manager_last_tenure["TM_manager_end_date"]).date() > datetime.date(2018,12,20):
            manager_current_club_id_list.append(manager_last_tenure["TM_team_id"])
        else:
            manager_current_club_id_list.append(None)
    except Exception:
        manager_current_club_id_list.append(None)

In [7]:
manager_previous_club_names_list = []
manager_previous_club_countries_list = []
for m in manager_previous_club_ids_list:
    previous_club_names = []
    previous_country_names = []
    for i in m:
        club_name = team_converter[team_converter["TM_team_id"]==i]["full_team_name"].iloc[0]
        club_country = team_converter[team_converter["TM_team_id"]==i]["league_country"].iloc[0]
        previous_club_names.append(club_name)
        previous_country_names.append(club_country)
    manager_previous_club_names_list.append(previous_club_names)
    manager_previous_club_countries_list.append(previous_country_names)

In [8]:
manager_current_club_names_list = []
manager_current_club_countries_list = []
for i in manager_current_club_id_list:
    try:
        club_name = team_converter[team_converter["TM_team_id"]==i]["full_team_name"].iloc[0]
        club_country = team_converter[team_converter["TM_team_id"]==i]["league_country"].iloc[0]
        manager_current_club_names_list.append(club_name)
        manager_current_club_countries_list.append(club_country)
    except Exception:
        manager_current_club_names_list.append(None)
        manager_current_club_countries_list.append(None)        

In [9]:
manager_table["previous_club_names"] = manager_previous_club_names_list
manager_table["previous_club_ids"] = manager_previous_club_ids_list
manager_table["previous_club_countries"] = manager_previous_club_countries_list
manager_table["current_club_name"] = manager_current_club_names_list
manager_table["current_club_name"] = manager_current_club_names_list
manager_table["current_club_id"] = manager_current_club_id_list
manager_table["currently_employed"] = 1*(manager_table["current_club_name"].notnull())

## Step 2: assign Manchester United current job to Ole Gunnar Solskjaer ##

In [10]:
solskjaer_index = manager_table[manager_table["manager"]=="Ole Gunnar Solskjaer"].index.values[0]

In [11]:
manager_table["previous_club_names"].loc[solskjaer_index] = ['Cardiff','Manchester United']
manager_table["previous_club_ids"].loc[solskjaer_index] = [603,985]
manager_table["current_club_name"].loc[solskjaer_index] = 'Manchester United'
manager_table["current_club_id"].loc[solskjaer_index] = 985
manager_table["currently_employed"].loc[solskjaer_index] = 1

In [12]:
manager_table.to_csv("output_files/manager_table_df.csv")