In [1]:
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt 
import numpy as np

### Loading data

We go back to the data we saved in notebook 05, as it contains the same information as in plotting data and also some str type columns that we could not plot but from which we could extract information during the feature engineering.



In [2]:
df = pd.read_csv('Cleaned Data', index_col = [0])
df.tail(5)

Unnamed: 0,match_id,year,round,local,visitor,league_id,team1_id_season,team2_id_season,team1_id,team2_id,...,points_visitor,wins_visitor,draws_visitor,losses_visitor,gf_visitor,ga_visitor,avg_visitor,pos_visitor,form_visitor,match_winner
4783,91110,2021,38,Real Oviedo,Sabadell,57314,6382799,6382802,2115,2198,...,40,9.0,13.0,15.0,35,42,-7.0,19,lddww,0
4784,91104,2021,38,FC Cartagena,CD Castellón,57314,6382787,6382788,643,673,...,41,11.0,8.0,18.0,35,43,-8.0,18,wlwdd,0
4785,91112,2021,38,UD Logroñés,Girona,57314,6382792,6391868,1578,1236,...,58,16.0,10.0,11.0,39,34,5.0,6,wlwww,2
4786,91109,2021,38,Rayo Vallecano,Leganés,57314,6382798,6382791,2080,1535,...,62,18.0,8.0,11.0,41,31,10.0,4,wldwd,1
4787,91105,2021,38,Real Sporting,Lugo,57314,6382800,6382793,2125,1598,...,37,8.0,13.0,16.0,32,49,-17.0,21,lllld,0


### Creating new columns

In this notebook new columns will be created with information that can be useful to improve the results of the model. These columns will be obtained from columns that have already been elaborated as strings, or based on columns that have already been introduced in the model.

#### Results a Round before

By making use of the columns form local and form visitor new columns will be created that contain the results of the last few matches. We will transform the strings of these columns into points obtained.

In [3]:
def prev1roundlocal(col):
    if col['form_local'][0] == 'w':
        return 3
    if col['form_local'][0] == 'd':
        return 1
    if col['form_local'][0] == 'l':
        return 0

df['round-1_local'] = df.apply(lambda col: prev1roundlocal (col),axis=1)
df['round-1_local']

10      1
11      1
12      3
13      3
14      1
       ..
4783    0
4784    3
4785    3
4786    3
4787    0
Name: round-1_local, Length: 4662, dtype: int64

In [4]:
df['form_local']

10          d
11          d
12          w
13          w
14          d
        ...  
4783    ldwld
4784    wddwl
4785    wwdld
4786    wlddl
4787    llldd
Name: form_local, Length: 4662, dtype: object

In [5]:
def prev1roundvisitor(col):
    if col['form_visitor'][0] == 'w':
        return 3
    if col['form_visitor'][0] == 'd':
        return 1
    if col['form_visitor'][0] == 'l':
        return 0

df['round-1_visitor'] = df.apply(lambda col: prev1roundlocal (col), axis=1)
df['round-1_visitor'].tail()

4783    0
4784    3
4785    3
4786    3
4787    0
Name: round-1_visitor, dtype: int64

In [6]:
df.head(5)

Unnamed: 0,match_id,year,round,local,visitor,league_id,team1_id_season,team2_id_season,team1_id,team2_id,...,draws_visitor,losses_visitor,gf_visitor,ga_visitor,avg_visitor,pos_visitor,form_visitor,match_winner,round-1_local,round-1_visitor
10,37467,2016,2,Villarreal,Espanyol,15373,214625,214629,2716,998,...,0.0,0.0,1,0,1.0,5,w,0,1,1
11,37471,2016,2,R. Sociedad,Real Sporting,15373,214631,214619,2120,2125,...,1.0,0.0,0,0,0.0,13,d,1,1,1
12,37463,2016,2,Barcelona,Málaga,15373,214620,214628,429,1617,...,1.0,0.0,0,0,0.0,9,d,0,3,3
13,37457,2016,2,Celta,Rayo Vallecano,15373,214627,214630,712,2080,...,1.0,0.0,0,0,0.0,11,d,0,3,3
14,37469,2016,2,Real Madrid,Real Betis,15373,214621,214618,2107,486,...,1.0,0.0,1,1,0.0,6,d,0,1,1


In order to extract information of previous rounds from columns 'form_local' & 'form_visitor' is necessary to put into the same length the strings from each row in this colums. We will fill the spaces with 'n' from none. 

In [7]:
max_length = df.form_local.map(len).max()
df.form_local = df.form_local.apply(lambda x: x + 'n'*(max_length - len(x)))
df['form_local']

10      dnnnn
11      dnnnn
12      wnnnn
13      wnnnn
14      dnnnn
        ...  
4783    ldwld
4784    wddwl
4785    wwdld
4786    wlddl
4787    llldd
Name: form_local, Length: 4662, dtype: object

In [8]:
df.form_visitor = df.form_visitor.apply(lambda x: x + 'n'*(max_length - len(x)))
df['form_visitor']

10      wnnnn
11      dnnnn
12      dnnnn
13      dnnnn
14      dnnnn
        ...  
4783    lddww
4784    wlwdd
4785    wlwww
4786    wldwd
4787    lllld
Name: form_visitor, Length: 4662, dtype: object

We create new columns assigning points to the home and away teams according to their results in the last matches. In order not to confuse the model, when we do not have information about the result of the round in question (this happens in the first rounds), we will assign an indicative value to the coefficient, different from 0, 1 and 3, so as not to confuse the lack of information with wins, draws or defeats.

#### Results two rounds before or more

In [9]:
def prevroundlocal(col, round):
    if col['form_local'][round] == 'w':
        return 3
    if col['form_local'][round] == 'd':
        return 1
    if col['form_local'][round] == 'l':
        return 0
    if col['form_local'][round] == 'n':
        return 1.5

In [10]:
def prevroundvisitor(col, round):
    if col['form_visitor'][round] == 'w':
        return 3
    if col['form_visitor'][round] == 'd':
        return 1
    if col['form_visitor'][round] == 'l':
        return 0
    if col['form_visitor'][round] == 'n':
        return 1.5

In [11]:
df['round-2_local'] = df.apply(lambda col: prevroundlocal (col, 1),axis=1)
df['round-2_visitor'] = df.apply(lambda col: prevroundvisitor (col, 1),axis=1)
df['round-3_local'] = df.apply(lambda col: prevroundlocal (col, 2),axis=1)
df['round-3_visitor'] = df.apply(lambda col: prevroundvisitor (col, 2),axis=1)
df['round-4_local'] = df.apply(lambda col: prevroundlocal (col, 3),axis=1)
df['round-4_visitor'] = df.apply(lambda col: prevroundvisitor (col, 3),axis=1)
df['round-5_local'] = df.apply(lambda col: prevroundlocal (col, 4),axis=1)
df['round-5_visitor'] = df.apply(lambda col: prevroundvisitor (col, 4),axis=1)

In [12]:
df.tail()

Unnamed: 0,match_id,year,round,local,visitor,league_id,team1_id_season,team2_id_season,team1_id,team2_id,...,round-1_local,round-1_visitor,round-2_local,round-2_visitor,round-3_local,round-3_visitor,round-4_local,round-4_visitor,round-5_local,round-5_visitor
4783,91110,2021,38,Real Oviedo,Sabadell,57314,6382799,6382802,2115,2198,...,0,0,1.0,1.0,3.0,1.0,0.0,3.0,1.0,3.0
4784,91104,2021,38,FC Cartagena,CD Castellón,57314,6382787,6382788,643,673,...,3,3,1.0,0.0,1.0,3.0,3.0,1.0,0.0,1.0
4785,91112,2021,38,UD Logroñés,Girona,57314,6382792,6391868,1578,1236,...,3,3,3.0,0.0,1.0,3.0,0.0,3.0,1.0,3.0
4786,91109,2021,38,Rayo Vallecano,Leganés,57314,6382798,6382791,2080,1535,...,3,3,0.0,0.0,1.0,1.0,1.0,3.0,0.0,1.0
4787,91105,2021,38,Real Sporting,Lugo,57314,6382800,6382793,2125,1598,...,0,0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0


#### Coefficients of points accumulated in the last rounds

Now that there is a column created with the information of each previous round, columns are created by accumulating points obtained in the last rounds. 

This is intended to evaluate the influence of the results in the pre-match rounds for each of the teams.

In [13]:
df['pts_last1_local'] = df.apply(lambda x: x['round-1_local'], axis=1)
df['pts_last2_local'] = df.apply(lambda x: x['round-1_local'] + x['round-2_local'], axis=1)
df['pts_last2_local'].tail()

4783    1.0
4784    4.0
4785    6.0
4786    3.0
4787    0.0
Name: pts_last2_local, dtype: float64

In [14]:
print(df['round-1_local'], df['round-2_local'])

10      1
11      1
12      3
13      3
14      1
       ..
4783    0
4784    3
4785    3
4786    3
4787    0
Name: round-1_local, Length: 4662, dtype: int64 10      1.5
11      1.5
12      1.5
13      1.5
14      1.5
       ... 
4783    1.0
4784    1.0
4785    3.0
4786    0.0
4787    0.0
Name: round-2_local, Length: 4662, dtype: float64


In [15]:
df['pts_last3_local'] = df.apply(lambda x: x['round-1_local'] + x['round-2_local'] + x['round-3_local'] , axis=1)
df['pts_last4_local'] = df.apply(lambda x: x['round-1_local'] + x['round-2_local'] + x['round-3_local'] + x['round-4_local'] , axis=1)
df['pts_last5_local'] = df.apply(lambda x: x['round-1_local'] + x['round-2_local'] + x['round-3_local'] + x['round-4_local'] + x['round-5_local'], axis=1)
df['pts_last1_visitor'] = df.apply(lambda x: x['round-1_visitor'] , axis=1)
df['pts_last2_visitor'] = df.apply(lambda x: x['round-1_visitor'] + x['round-2_visitor'] , axis=1)
df['pts_last3_visitor'] = df.apply(lambda x: x['round-1_visitor'] + x['round-2_visitor'] + x['round-3_visitor']  , axis=1)
df['pts_last4_visitor'] = df.apply(lambda x: x['round-1_visitor'] + x['round-2_visitor'] + x['round-3_visitor'] + x['round-4_visitor'] , axis=1)
df['pts_last5_visitor'] = df.apply(lambda x: x['round-1_visitor'] + x['round-2_visitor'] + x['round-3_visitor'] + x['round-4_visitor'] + x['round-5_visitor'], axis=1)

In [16]:
df.tail(10)

Unnamed: 0,match_id,year,round,local,visitor,league_id,team1_id_season,team2_id_season,team1_id,team2_id,...,pts_last1_local,pts_last2_local,pts_last3_local,pts_last4_local,pts_last5_local,pts_last1_visitor,pts_last2_visitor,pts_last3_visitor,pts_last4_visitor,pts_last5_visitor
4778,91108,2021,38,Ponferradina,Albacete,57314,6382797,6382785,3287,140,...,0,1.0,2.0,5.0,5.0,0,0.0,3.0,4.0,4.0
4779,91113,2021,38,Real Zaragoza,Espanyol,57314,6390092,6382789,2136,998,...,3,6.0,6.0,7.0,8.0,3,6.0,7.0,10.0,13.0
4780,91103,2021,38,Alcorcón,Las Palmas,57314,6382786,6382790,64,2563,...,0,3.0,6.0,6.0,9.0,0,1.0,2.0,2.0,5.0
4781,91106,2021,38,Málaga,Mallorca,57314,6382794,6382795,1617,1623,...,3,6.0,7.0,7.0,7.0,3,6.0,6.0,6.0,9.0
4782,91107,2021,38,Mirandés,Fuenlabrada,57314,6382796,6387869,1699,1179,...,3,3.0,4.0,5.0,5.0,3,4.0,5.0,8.0,11.0
4783,91110,2021,38,Real Oviedo,Sabadell,57314,6382799,6382802,2115,2198,...,0,1.0,4.0,4.0,5.0,0,1.0,2.0,5.0,8.0
4784,91104,2021,38,FC Cartagena,CD Castellón,57314,6382787,6382788,643,673,...,3,4.0,5.0,8.0,8.0,3,3.0,6.0,7.0,8.0
4785,91112,2021,38,UD Logroñés,Girona,57314,6382792,6391868,1578,1236,...,3,6.0,7.0,7.0,8.0,3,3.0,6.0,9.0,12.0
4786,91109,2021,38,Rayo Vallecano,Leganés,57314,6382798,6382791,2080,1535,...,3,3.0,4.0,5.0,5.0,3,3.0,4.0,7.0,8.0
4787,91105,2021,38,Real Sporting,Lugo,57314,6382800,6382793,2125,1598,...,0,0.0,0.0,1.0,2.0,0,0.0,0.0,0.0,1.0


#### Differences between teams (avg, pts & pos) 

Three more columns are created to symbolise the differences between the two teams in terms of points average and position in the table. 

In [17]:
df['pts_difference'] = df.apply(lambda x: x['points_local'] - x['points_visitor'] , axis=1)
df['avg_difference'] = df.apply(lambda x: x['avg_local'] - x['avg_visitor'] , axis=1)
df['pos_difference'] = df.apply(lambda x: x['pos_local'] - x['pos_visitor'] , axis=1)

In [18]:
df.head(5)

Unnamed: 0,match_id,year,round,local,visitor,league_id,team1_id_season,team2_id_season,team1_id,team2_id,...,pts_last4_local,pts_last5_local,pts_last1_visitor,pts_last2_visitor,pts_last3_visitor,pts_last4_visitor,pts_last5_visitor,pts_difference,avg_difference,pos_difference
10,37467,2016,2,Villarreal,Espanyol,15373,214625,214629,2716,998,...,5.5,7.0,1,2.5,4.0,5.5,7.0,-2,-1.0,2
11,37471,2016,2,R. Sociedad,Real Sporting,15373,214631,214619,2120,2125,...,5.5,7.0,1,2.5,4.0,5.5,7.0,0,0.0,-3
12,37463,2016,2,Barcelona,Málaga,15373,214620,214628,429,1617,...,7.5,9.0,3,4.5,6.0,7.5,9.0,2,1.0,-5
13,37457,2016,2,Celta,Rayo Vallecano,15373,214627,214630,712,2080,...,7.5,9.0,3,4.5,6.0,7.5,9.0,2,1.0,-9
14,37469,2016,2,Real Madrid,Real Betis,15373,214621,214618,2107,486,...,5.5,7.0,1,2.5,4.0,5.5,7.0,0,0.0,6


### Introducing categorical input variable

We want to enter the id of each team as an input variable (categorical). In this way the model takes into account the behaviour of the teams in previous seasons (historical trend of the team's results). 

For this purpose, the variables corresponding to the identifiers of each equipment are used. teams ids: team1_id_season y team1_id). To use these identifiers we will use get dummies as it is a categorical variable.

The interest this may have is, if so, to detect the favouritism of a team by its club history rather than by its current situation in the season.

In [19]:
local_dummies = pd.get_dummies(df['local'])
local_dummies.columns = [item + "_local" for item in local_dummies.columns]

visitor_dummies = pd.get_dummies(df['visitor'])
visitor_dummies.columns = [item + "_visitor" for item in visitor_dummies.columns]

In [20]:
df_input = df.copy()

In [21]:
df_input = df_input.join(local_dummies)
df_input = df_input.join(visitor_dummies)

In [22]:
winner = df_input.pop('match_winner')
df_input.insert(df_input.shape[1], 'match_winner', winner)

In [23]:
df_input[['local', 'visitor', 'division', 'local_goals',
       'visitor_goals', 'result', 'winner', 'points_local', 'wins_local',
       'draws_local', 'losses_local', 'gf_local', 'ga_local', 'avg_local',
       'pos_local', 'form_local', 'points_visitor', 'wins_visitor',
       'draws_visitor']]

Unnamed: 0,local,visitor,division,local_goals,visitor_goals,result,winner,points_local,wins_local,draws_local,losses_local,gf_local,ga_local,avg_local,pos_local,form_local,points_visitor,wins_visitor,draws_visitor
10,Villarreal,Espanyol,1,3,1,3-1,214625,1,0.0,1.0,0.0,1,1,0.0,7,dnnnn,3,1.0,0.0
11,R. Sociedad,Real Sporting,1,0,0,0-0,0,1,0.0,1.0,0.0,0,0,0.0,10,dnnnn,1,0.0,1.0
12,Barcelona,Málaga,1,1,0,1-0,214620,3,1.0,0.0,0.0,1,0,1.0,4,wnnnn,1,0.0,1.0
13,Celta,Rayo Vallecano,1,3,0,3-0,214627,3,1.0,0.0,0.0,2,1,1.0,2,wnnnn,1,0.0,1.0
14,Real Madrid,Real Betis,1,5,0,5-0,214621,1,0.0,1.0,0.0,0,0,0.0,12,dnnnn,1,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4783,Real Oviedo,Sabadell,2,2,1,2-1,6382799,44,9.0,17.0,11.0,38,38,0.0,14,ldwld,40,9.0,13.0
4784,FC Cartagena,CD Castellón,2,1,0,1-0,6382787,39,9.0,12.0,16.0,36,47,-11.0,20,wddwl,41,11.0,8.0
4785,UD Logroñés,Girona,2,1,4,1-4,6391868,41,10.0,11.0,16.0,25,43,-18.0,17,wwdld,58,16.0,10.0
4786,Rayo Vallecano,Leganés,2,1,1,1-1,0,57,16.0,9.0,12.0,43,36,7.0,7,wlddl,62,18.0,8.0


In [24]:
remove_columns = ['match_id','local','visitor','league_id', 'team1_id_season', 'team2_id_season', 'team1_id',
               'team2_id', 'local_abbr', 'visitor_abbr', 'local_goals', 'visitor_goals', 'result', 'winner', 
               'form_local', 'form_visitor']
featured_df = df_input[[item for item in df_input.columns if item not in remove_columns]]

In [25]:
featured_df.tail()

Unnamed: 0,year,round,division,points_local,wins_local,draws_local,losses_local,gf_local,ga_local,avg_local,...,Reus Deportiu_visitor,Sabadell_visitor,Sevilla_visitor,Sevilla At._visitor,Tenerife_visitor,UCAM Murcia_visitor,UD Logroñés_visitor,Valencia_visitor,Villarreal_visitor,match_winner
4783,2021,38,2,44,9.0,17.0,11.0,38,38,0.0,...,0,1,0,0,0,0,0,0,0,0
4784,2021,38,2,39,9.0,12.0,16.0,36,47,-11.0,...,0,0,0,0,0,0,0,0,0,0
4785,2021,38,2,41,10.0,11.0,16.0,25,43,-18.0,...,0,0,0,0,0,0,0,0,0,2
4786,2021,38,2,57,16.0,9.0,12.0,43,36,7.0,...,0,0,0,0,0,0,0,0,0,1
4787,2021,38,2,58,15.0,13.0,9.0,35,25,10.0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
for item in featured_df:
    print(item)


year
round
division
points_local
wins_local
draws_local
losses_local
gf_local
ga_local
avg_local
pos_local
points_visitor
wins_visitor
draws_visitor
losses_visitor
gf_visitor
ga_visitor
avg_visitor
pos_visitor
round-1_local
round-1_visitor
round-2_local
round-2_visitor
round-3_local
round-3_visitor
round-4_local
round-4_visitor
round-5_local
round-5_visitor
pts_last1_local
pts_last2_local
pts_last3_local
pts_last4_local
pts_last5_local
pts_last1_visitor
pts_last2_visitor
pts_last3_visitor
pts_last4_visitor
pts_last5_visitor
pts_difference
avg_difference
pos_difference
Alavés_local
Albacete_local
Alcorcón_local
Almería_local
Athletic_local
Atlético_local
Barcelona_local
Barcelona B_local
Bilbao Ath._local
CD Castellón_local
Celta_local
Cultural Leonesa_local
Cádiz_local
Córdoba_local
Deportivo_local
Eibar_local
Elche_local
Espanyol_local
Extremadura_local
FC Cartagena_local
Fuenlabrada_local
Getafe_local
Gimnàstic Tarragona_local
Girona_local
Granada_local
Huesca_local
Las Palmas_local


### Saving data for Tableau 

At this point, we distinct two different datasets. The features one, which will be used for modelling saved as a .csv file and the visualizing one, used for the front-end which will be saved as a .xls file. 

Is alredy done the featured one, we re-used df now to select the interesting columns for differents visualizations in Tableau

In [27]:
df.columns

Index(['match_id', 'year', 'round', 'local', 'visitor', 'league_id',
       'team1_id_season', 'team2_id_season', 'team1_id', 'team2_id',
       'local_abbr', 'visitor_abbr', 'division', 'local_goals',
       'visitor_goals', 'result', 'winner', 'points_local', 'wins_local',
       'draws_local', 'losses_local', 'gf_local', 'ga_local', 'avg_local',
       'pos_local', 'form_local', 'points_visitor', 'wins_visitor',
       'draws_visitor', 'losses_visitor', 'gf_visitor', 'ga_visitor',
       'avg_visitor', 'pos_visitor', 'form_visitor', 'match_winner',
       'round-1_local', 'round-1_visitor', 'round-2_local', 'round-2_visitor',
       'round-3_local', 'round-3_visitor', 'round-4_local', 'round-4_visitor',
       'round-5_local', 'round-5_visitor', 'pts_last1_local',
       'pts_last2_local', 'pts_last3_local', 'pts_last4_local',
       'pts_last5_local', 'pts_last1_visitor', 'pts_last2_visitor',
       'pts_last3_visitor', 'pts_last4_visitor', 'pts_last5_visitor',
       'pts_differen

In [28]:
frontend_df = df.drop(['match_id','league_id',
                       'team1_id_season', 'team2_id_season', 'team1_id',
                       'team2_id','local_goals','visitor_goals', 'result',
                       'winner','form_local','form_visitor','round-1_local',
                       'round-1_visitor', 'round-2_local', 'round-2_visitor',
                       'round-3_local', 'round-3_visitor', 'round-4_local', 
                       'round-4_visitor','round-5_local', 'round-5_visitor'], axis=1)


A category variable identical to match winner is created to facilitate the frontend realization. The content of both variables is the same, only the dtype changes. 
Having both variables available will facilitate representation in Tableau.

In [29]:
def categoricwinner(col):
    if col['match_winner'] == 0:
        return 'Local win'
    if col['match_winner'] == 1:
        return 'Draw'
    if col['match_winner'] == 2:
        return 'Visitor win'

frontend_df['Result'] = frontend_df.apply(lambda col: categoricwinner (col), axis=1)
frontend_df['Result'].tail()

4783      Local win
4784      Local win
4785    Visitor win
4786           Draw
4787      Local win
Name: Result, dtype: object

In [30]:
frontend_df

Unnamed: 0,year,round,local,visitor,local_abbr,visitor_abbr,division,points_local,wins_local,draws_local,...,pts_last5_local,pts_last1_visitor,pts_last2_visitor,pts_last3_visitor,pts_last4_visitor,pts_last5_visitor,pts_difference,avg_difference,pos_difference,Result
10,2016,2,Villarreal,Espanyol,VIL,ESP,1,1,0.0,1.0,...,7.0,1,2.5,4.0,5.5,7.0,-2,-1.0,2,Local win
11,2016,2,R. Sociedad,Real Sporting,RSO,SPO,1,1,0.0,1.0,...,7.0,1,2.5,4.0,5.5,7.0,0,0.0,-3,Draw
12,2016,2,Barcelona,Málaga,FCB,MAL,1,3,1.0,0.0,...,9.0,3,4.5,6.0,7.5,9.0,2,1.0,-5,Local win
13,2016,2,Celta,Rayo Vallecano,CEL,RAY,1,3,1.0,0.0,...,9.0,3,4.5,6.0,7.5,9.0,2,1.0,-9,Local win
14,2016,2,Real Madrid,Real Betis,RMA,BET,1,1,0.0,1.0,...,7.0,1,2.5,4.0,5.5,7.0,0,0.0,6,Local win
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4783,2021,38,Real Oviedo,Sabadell,ROV,SAB,2,44,9.0,17.0,...,5.0,0,1.0,2.0,5.0,8.0,4,7.0,-5,Local win
4784,2021,38,FC Cartagena,CD Castellón,CAR,CAS,2,39,9.0,12.0,...,8.0,3,3.0,6.0,7.0,8.0,-2,-3.0,2,Local win
4785,2021,38,UD Logroñés,Girona,UDL,GIR,2,41,10.0,11.0,...,8.0,3,3.0,6.0,9.0,12.0,-17,-23.0,11,Visitor win
4786,2021,38,Rayo Vallecano,Leganés,RAY,LEG,2,57,16.0,9.0,...,5.0,3,3.0,4.0,7.0,8.0,-5,-3.0,3,Draw


In [31]:
frontend_df.to_excel('frontend data.xls')
featured_df.to_csv('featured data')