In [7]:
#install.packages('gtools')
library('gtools')
library(dplyr)

In [8]:
#install.packages("matrixStats")
library(matrixStats)

In [9]:
#Part 1 and 2: Simulating Ranking according to expected final standing and variation in simulated earnings

ERROR: Error in parse(text = x, srcfile = src): <text>:1:6: unexpected numeric constant
1: Part 1
         ^


In [10]:
league_data<-read.csv("https://projects.fivethirtyeight.com/soccer-api/club/spi_global_rankings.csv")


#Master Season simulator function...outputs ranking and points 

season_sim<- function(league_data){

    league_data=league_data
    #Load in league data and offense and defense scores
    premLeague<-subset(league_data,league== "Barclays Premier League")
    premTeams<-premLeague$name
    rownames(premLeague)<-premTeams
    df<-premLeague[,c("off","def")]
   
    #Create simulation parameters
    lmean.def<- log(mean(df$def))
    lmean.off<- log(mean(df$off))              
    df["alpha"]<-log(df["off"])-lmean.def
    df["delta"]<-lmean.off-log(df["def"])

    alphaList<-df$alpha
    deltaList<-df$delta
    names(alphaList)<-rownames(df)
    names(deltaList)<-rownames(df)

    #Generate permutations of all matches
    allMatches<-permutations(20, 2, v=rownames(df),repeats.allowed=FALSE)
    colnames(allMatches)<-c("home","away")    

   
    #function for simulating a single game
    draw.score<-function(team1,team2){
    c(rpois(1,exp(alphaList[team1]-deltaList[team2])),
      rpois(1,exp(alphaList[team2]-deltaList[team1])))
    }
   
    #Initialize Scores matrix to house results of all matches
    ScoresMatrix <- matrix(nrow=nrow(allMatches),  ncol=4)
    for (ii in 1:nrow(allMatches)  ) {
     ScoresMatrix[ii,1:2]=allMatches[ii,]
     ScoresMatrix[ii,3:4]= draw.score(allMatches[ii,"home"],allMatches[ii,"away"] )
     
    }
   
    colnames(ScoresMatrix)<-c("home.team","away.team","home.score","away.score")
   
    #Season_df has season data
    season_df<- data.frame(ScoresMatrix)    
   
    #create useful columns home result awards points for the outcome and home wins counts the number of wins (will be useful later)
    season_df= season_df %>% mutate(
                                    home.result = case_when(season_df$home.score > season_df$away.score~3,
                                   season_df$home.score< season_df$away.score ~0,
                                   season_df$home.score== season_df$away.score~1),
                                    home.wins=ifelse(home.result==3,1,0))
    #Does the same for away
    season_df = season_df %>% mutate(
                                    away.result= case_when (away.score > home.score~3,
                                   away.score< home.score ~0,
                                   away.score== home.score~1),
                                    away.wins= ifelse(away.result==3,1,0))
    #Need to change these from string to integer
    season_df$home.score=strtoi(season_df$home.score)
    season_df$away.score=strtoi(season_df$away.score)
    season_df$home.wins=strtoi(season_df$home.wins)
    season_df$away.wins=strtoi(season_df$away.wins)
   
    #Summing up home goals, points, wins, etc. for each team when it functions as the home team
    home_stats=season_df%>%group_by(home.team)%>% summarize(home.goals=sum(home.score),
                                                        home.goals.against=sum(away.score),
                                                        home.points=sum(home.result),
                                                        home.wins=sum(home.wins))
   
    #Do the same summary stats for teams when they are away teams
    away_stats=season_df %>% group_by(away.team) %>% summarize(away.goals=sum(away.score),
                                                           away.goals.against=sum(home.score),
                                                          away.points=sum(away.result),
                                                               away.wins=sum(away.wins))
   
    #Combine the home and away summary data, clean it up and create more columns
    stats= bind_cols(home_stats, away_stats)
    stats['total.goals.for']=stats$home.goals + stats$away.goals
    stats['total.goals.against']=stats$home.goals.against + stats$away.goals.against
    stats['total.points']=stats$home.points + stats$away.points
    stats['total.wins']=stats$home.wins + stats$away.wins
    stats['total.wins']=strtoi(stats$total.wins)
   
    #Select useful columns
    stats= stats %>% select(home.team, total.goals.for, total.goals.against, total.points, total.wins)
    stats=rename(stats, 'Team'='home.team')
    stats['goal_dif']=stats$total.goals.for-stats$total.goals.against
    stats['random_seed']=runif(20, -20,20)
   
    #Arrange stats to determine place and prize information.
    placement= data.frame(arrange(stats, desc(total.points), desc(goal_dif), desc(total.goals.for), desc(random_seed), desc(total.wins)))
    placement['place']=1:20
   
    #Prize winnings (in GB pounds, millions). I ballparked these numbers from the graph Alistair gave us
    placement['prize']=c(215, 210, 195,185, 165,152,145,130,125,122,118,115,112,110,105,102,100,52,50,49)
    rownames(placement)<-placement$Team
    return(placement[,c('place', 'prize','total.wins')])
    }


In [11]:
#Monte carlo simulation function for regular season 

monte.carlo.sim<-function(fun,fun.arg,nSims=100){
    #Call the function you want to load once..this will be the season simulation function
    rep1<-do.call(fun, fun.arg)
    #create list of teamnames from the index of the season data (since the rows are teamnames)
    teamnames<-rownames(rep1)
    #Initialize placement, prize, and win matrices to house the simulation results
    #These matrices will have twenty rows, one for each team, and one entry for each simulation you run (which is why
    #number of columns equals num of simulations)
   
    placementMatrix <- matrix(1,nrow=20, ncol=nSims)
    prizeMatrix<- matrix(1, nrow=20, ncol=nSims)
    winMatrix<-matrix(1, nrow=20, ncol=nSims)
    #use teamnames as their indices
    rownames(placementMatrix) <- teamnames
    rownames(prizeMatrix)<-teamnames
    rownames(winMatrix)<-teamnames
   
    #Loop over each team and add the information from the first simulation into these matrices.
    for (i in teamnames){
        placementMatrix[i,1 ]<-rep1[i, 'place']
        prizeMatrix[i, 1]<-rep1[i, 'prize']
        winMatrix[i,1]<-rep1[i, 'total.wins']
    }
   
    #Loop to initialize the remaining simulations and populate the matrices with their results
    for (rep in 2:nSims) {
        rep1<-do.call(fun, fun.arg)
        for (i in teamnames){
            placementMatrix[i,rep ]<-rep1[i, 'place']
            prizeMatrix[i, rep]<-rep1[i, 'prize']
            winMatrix[i,rep]<-rep1[i, 'total.wins']}

    }
   
    #Calculate summary statistics from these simulations. Note you may want to make these matrices into data frames and
    #graph their data points to illustrate, e.g. variability in prize money. If so, you can take the following operations
    #out of the function.
   
    avg_place=rowSums(placementMatrix)/nSims
    avg_place=data.frame(avg_place)
    avg_prize=rowSums(prizeMatrix)/nSims
    avg_prize=data.frame(avg_prize)
    avg_wins=rowSums(winMatrix)/nSims
    avg_wins=data.frame(avg_wins)
    std_place=rowSds(placementMatrix)
    std_prize=rowSds(prizeMatrix)
    std_wins=rowSds(winMatrix)
    results<-bind_cols(avg_place, avg_prize, avg_wins,std_place,std_prize, std_wins)
    colnames(results)<-c('avg_place', 'avg_prize', 'avg_wins','std_place', 'std_prize', 'std_wins')
    results['avg.value.per.win']<- results$avg_prize/ results$avg_wins
   
    return (results)

}

In [20]:
    #feed season sim function into monte carlo function for simulation results. Did this for 3000 simulations 
orig_results<-monte.carlo.sim(season_sim, c(list(league_data)), nSims=3000)
#orig_results[, c('avg_place', 'avg_prize', 'avg.value.per.win' ) ]
#orig_names<-rownames(orig_results)
print(orig_results)

New names:
* `` -> ...4
* `` -> ...5
* `` -> ...6



                         avg_place avg_prize  avg_wins std_place std_prize
Manchester City           1.553667 211.10667 25.282000 0.7122422  6.524785
Liverpool                 1.837333 208.86667 24.434667 0.7770076  7.811481
Chelsea                   2.960333 195.73467 21.084000 0.9955352 12.719809
Aston Villa               8.390333 137.45300 14.142333 3.1072804 22.948159
Wolverhampton             9.757333 127.90633 12.836333 3.3253770 21.759673
Arsenal                   5.067000 169.48367 17.341667 1.9879206 22.529779
West Ham United           9.860333 127.63100 13.194000 3.4064685 22.230930
Tottenham Hotspur         6.572333 152.82633 16.035333 2.5928766 24.082957
Manchester United         7.860000 141.70700 14.832333 3.0425021 24.162721
Southampton              13.097667 108.31667 11.197667 3.4816905 23.848563
Leicester City           14.011667 102.47833 10.746667 3.4306637 25.926452
Brighton and Hove Albion  9.163333 131.78233 13.408000 3.2121792 22.192426
Leeds United             

In [None]:
#Mariginal Value of Wins

In [None]:
orig_results <- orig_results

In [14]:
#Two functions for filtering placement and prize fluctuation data
final_place<- function (results){
    #can add std_place in select if you want the std deviation of place
    final_placement<-results%>% arrange(avg_place)%>% select(avg_place)
    final_placement<-data.frame(final_placement)
    
    #Columns for lower and higher 95 percent conf ints
    #final_placement['CI_lower']<- final_placement$avg_place - 1.96*final_placement$std_place/sqrt(nSims)
    #final_placement['CI_higher']<-final_placement$avg_place + 1.96*final_placement$std_place/sqrt(nSims) 
    return (final_placement)   
}

prize_variation<-function(results){
    #Can add avg_prize if you want in select
    varying_prize<-results%>% arrange(desc(std_prize))%>% select(std_prize)
    varying_prize<- data.frame(varying_prize)
    return (varying_prize)
}
#feed monte carlo results into functions
final_placement<-final_place(orig_results)
prize_fluctuation<-prize_variation(orig_results)
final_placement
prize_fluctuation

Unnamed: 0_level_0,avg_place
Unnamed: 0_level_1,<dbl>
Manchester City,1.544
Liverpool,1.846
Chelsea,2.952
Arsenal,5.02
Tottenham Hotspur,6.704
Manchester United,7.758
Aston Villa,8.356
Brighton and Hove Albion,9.13
Wolverhampton,9.658
West Ham United,10.064


Unnamed: 0_level_0,std_prize
Unnamed: 0_level_1,<dbl>
Leeds United,28.857323
Everton,28.843917
Watford,27.906649
Leicester City,25.974436
Brentford,25.110945
Burnley,25.024973
Tottenham Hotspur,24.763441
Manchester United,24.000524
Southampton,23.537063
Aston Villa,22.92649


In [15]:
marg_win_func <- function (league_data, team) {
    league_data=league_data
    team=team
    #Load in league data and offense and defense scores
    premLeague<-subset(league_data,league== "Barclays Premier League")
    premTeams<-premLeague$name
    rownames(premLeague)<-premTeams
    df<-premLeague[,c("off","def")]
    
    #Create simulation parameters
    lmean.def<- log(mean(df$def))
    lmean.off<- log(mean(df$off))               
    df["alpha"]<-log(df["off"])-lmean.def
    df["delta"]<-lmean.off-log(df["def"])

    alphaList<-df$alpha
    deltaList<-df$delta
    names(alphaList)<-rownames(df)
    names(deltaList)<-rownames(df)

    #Generate permutations of all matches
    allMatches<-permutations(20, 2, v=rownames(df),repeats.allowed=FALSE)
    colnames(allMatches)<-c("home","away")    

    
    #function for simulating a single game
    draw.score<-function(team1,team2){
    c(rpois(1,exp(alphaList[team1]-deltaList[team2])),
      rpois(1,exp(alphaList[team2]-deltaList[team1])))
    }
    
    ScoresMatrix <- matrix(nrow=nrow(allMatches),  ncol=4)
    for (ii in 1:nrow(allMatches)  ) {
     ScoresMatrix[ii,1:2]=allMatches[ii,]
     ScoresMatrix[ii,3:4]= draw.score(allMatches[ii,"home"],allMatches[ii,"away"] ) 
      
    }

    colnames(ScoresMatrix)<-c("home.team","away.team","home.score","away.score")

    season_df<- data.frame(ScoresMatrix)    
        
    season_df= season_df %>% mutate(
                                    home.result = case_when(season_df$home.score > season_df$away.score~3,
                                   season_df$home.score< season_df$away.score ~0,
                                   season_df$home.score== season_df$away.score~1),
                                    home.wins=ifelse(home.result==3,1,0),
                                    home.ties=ifelse(home.result==1,1,0))
    
    season_df = season_df %>% mutate(
                                    away.result= case_when (away.score > home.score~3,
                                   away.score< home.score ~0,
                                   away.score== home.score~1),
                                    away.wins= ifelse(away.result==3,1,0),
                                    away.ties=ifelse(away.result==1,1,0))
    season_df$home.score=strtoi(season_df$home.score)
    season_df$away.score=strtoi(season_df$away.score)
    season_df$home.wins=strtoi(season_df$home.wins)
    season_df$away.wins=strtoi(season_df$away.wins)
    season_df$home.ties=strtoi(season_df$home.ties)
    season_df$away.ties=strtoi(season_df$away.ties)
    

    home_stats=season_df%>%group_by(home.team)%>% summarize(home.goals=sum(home.score), 
                                                        home.goals.against=sum(away.score),
                                                       home.points=sum(home.result),
                                                           home.wins=sum(home.wins),
                                                           home.ties=sum(home.ties))
    away_stats=season_df %>% group_by(away.team) %>% summarize(away.goals=sum(away.score), 
                                                           away.goals.against=sum(home.score),
                                                          away.points=sum(away.result),
                                                              away.wins=sum(away.wins),
                                                              away.ties=sum(away.ties))
    stats= bind_cols(home_stats, away_stats)
    stats['total.goals.for']=stats$home.goals + stats$away.goals
    stats['total.goals.against']=stats$home.goals.against + stats$away.goals.against
    stats['total.wins']=stats$home.wins + stats$away.wins
    stats['total.ties']=stats$home.ties + stats$away.ties
    stats['total.ties']=strtoi(stats$total.ties)
    stats['total.wins']=strtoi(stats$total.wins)
    stats['total.points']=stats$total.wins*3 + stats$total.ties
    stats= stats %>% select(home.team, total.goals.for, total.goals.against, total.wins, total.ties,total.points)
    stats=rename(stats, 'Team'='home.team')
    stats['goal_dif']=stats$total.goals.for-stats$total.goals.against
    stats['random_seed']=runif(20, -20,20)
    stats<-data.frame(stats)
    rownames(stats)<- stats$Team
    stats=stats[,c('total.goals.for', 'total.goals.against', 'total.wins', 'total.ties', 'total.points', 'goal_dif', 'random_seed')]
    
    #Make an adjusted wins column with original wins data, but change the team in question's win total and pick a random team to lose
    stats['adj.wins']<-stats['total.wins']
    stats[team, 'adj.wins']<- stats[team, 'total.wins']+1
    loser=sample(premTeams[premTeams != team],1)
    stats[loser, 'adj.wins'] <- stats[loser, 'total.wins']-1
    stats['adj.points']<- stats['adj.wins']*3 + stats$total.ties
    

    orig_placement= data.frame(arrange(stats, desc(total.points), desc(goal_dif), desc(total.goals.for), desc(random_seed), desc(total.wins)))
    orig_placement['orig_place']=1:20
    orig_placement['orig_prize']=c(215, 210, 195,185, 165,152,145,130,125,122,118,115,112,110,105,102,100,52,50,49)
    orig_placement= orig_placement[,c('orig_place', 'orig_prize','total.wins', 'total.points')]
    orig_placement
    
   adj_placement= data.frame(arrange(stats, desc(adj.points), desc(goal_dif), desc(total.goals.for), desc(random_seed), desc(total.wins)))
    adj_placement['adj_place']=1:20
    #Prize winnings (in GB pounds, millions)
    adj_placement['adj_prize']=c(215, 210, 195,185, 165,152,145,130,125,122,118,115,112,110,105,102,100,52,50,49)
    adj_placement=adj_placement[,c('adj_place', 'adj_prize','adj.wins', 'adj.points')]
adj_placement
results<- bind_cols(orig_placement[team, 'orig_prize'], adj_placement[team, 'adj_prize'])
results<- data.frame(results)
rownames(results)<-team
colnames(results)<- c('orig_prize', 'adj_prize')
results['marg_val_win']<- results[team,'adj_prize'] - results[team, 'orig_prize']
results
    }

In [16]:
#Give it a try
marg_win_func(league_data, 'Aston Villa')

New names:
* `` -> ...1
* `` -> ...2



Unnamed: 0_level_0,orig_prize,adj_prize,marg_val_win
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>
Aston Villa,118,122,4


In [17]:
#Monte Carlo func to simulate marg value
monte.carlo.marg.sim<-function(fun,fun.arg,nSims=100){
    rep1<-do.call(fun, fun.arg)
    
    margMatrix <- matrix(1, nrow=1, ncol=nSims)
    rownames(margMatrix)<-rownames(rep1)
    team=rownames(margMatrix)
    margMatrix[team,1]<-rep1[team, 'marg_val_win']
    
    for (rep in 2:nSims) {
        rep1<-do.call(fun, fun.arg)
        margMatrix[team,rep] <- rep1[team, 'marg_val_win']
    
    
    }
    
    margMatrix<-data.frame(margMatrix)
    
    marg_val<-rowSums(margMatrix)/nSims
    marg_val<-data.frame(marg_val)
    rownames(marg_val)<-rownames(rep1)
    
    return(marg_val)
    }

In [18]:
#Run simulation 3000 times for each team!
set.seed(0)
league_data=league_data
    #Load in league data and offense and defense scores
    premLeague<-subset(league_data,league== "Barclays Premier League")
    premTeams<-premLeague$name
    margMatrix<-matrix(1, nrow=20, ncol=1)
    rownames(margMatrix)<-premTeams
    colnames(margMatrix)<-'marg_val'

for (i in premTeams){

    result<-monte.carlo.marg.sim(marg_win_func, fun.arg=list(league_data, i), nSims=3000)
    margMatrix[i, 'marg_val']<-result[i, 'marg_val']
}
margMatrix<-data.frame(margMatrix)
margMatrix<- arrange(margMatrix, desc(marg_val))

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
* `` -> ...1
* `` -> ...2

New names:
*

In [19]:
#margMatrix
#write.table(margMatrix, file = "results_sim_2_marg.csv",
 #           sep = "\t", row.names = T)
margMatrix <- margMatrix
final_placement
margMatrix

Unnamed: 0_level_0,avg_place
Unnamed: 0_level_1,<dbl>
Manchester City,1.544
Liverpool,1.846
Chelsea,2.952
Arsenal,5.02
Tottenham Hotspur,6.704
Manchester United,7.758
Aston Villa,8.356
Brighton and Hove Albion,9.13
Wolverhampton,9.658
West Ham United,10.064


Unnamed: 0_level_0,marg_val
Unnamed: 0_level_1,<dbl>
Everton,10.538
Watford,9.978
Burnley,8.916
Leicester City,8.764
Leeds United,8.742
Newcastle,8.388
Aston Villa,8.308
Tottenham Hotspur,8.2
Wolverhampton,7.942
Crystal Palace,7.906


In [21]:
off_improve<- function(league_data,team){
    team=team
    league_data=league_data
    #Load in league data and offense and defense scores
    premLeague<-subset(league_data,league== "Barclays Premier League")
    premTeams<-premLeague$name
    rownames(premLeague)<-premTeams
    df<-premLeague[,c("off","def")]
    
    #Create simulation parameters
    df['adj_off']<-df$off
    df[team, 'adj_off']<- df[team, 'adj_off']+ .1* df[team, 'adj_off']
    lmean.def<- log(mean(df$def))
    lmean.off<- log(mean(df$off)) 
    
    df["alpha"]<-log(df["adj_off"])-lmean.def
    df["delta"]<-lmean.off-log(df["def"])
    
    
    alphaList<-df$alpha
    deltaList<-df$delta
    names(alphaList)<-rownames(df)
    names(deltaList)<-rownames(df)

    #Generate permutations of all matches
    allMatches<-permutations(20, 2, v=rownames(df),repeats.allowed=FALSE)
    colnames(allMatches)<-c("home","away")    

    
    #function for simulating a single game
    draw.score<-function(team1,team2){
    c(rpois(1,exp(alphaList[team1]-deltaList[team2])),
      rpois(1,exp(alphaList[team2]-deltaList[team1])))
    }
    
    ScoresMatrix <- matrix(nrow=nrow(allMatches),  ncol=4)
    for (ii in 1:nrow(allMatches)  ) {
     ScoresMatrix[ii,1:2]=allMatches[ii,]
     ScoresMatrix[ii,3:4]= draw.score(allMatches[ii,"home"],allMatches[ii,"away"] ) 
      
    }

    colnames(ScoresMatrix)<-c("home.team","away.team","home.score","away.score")

    season_df<- data.frame(ScoresMatrix)    
        
    season_df= season_df %>% mutate(
                                    home.result = case_when(season_df$home.score > season_df$away.score~3,
                                   season_df$home.score< season_df$away.score ~0,
                                   season_df$home.score== season_df$away.score~1),
                                    home.wins=ifelse(home.result==3,1,0))
    
    season_df = season_df %>% mutate(
                                    away.result= case_when (away.score > home.score~3,
                                   away.score< home.score ~0,
                                   away.score== home.score~1),
                                    away.wins= ifelse(away.result==3,1,0))
    season_df$home.score=strtoi(season_df$home.score)
    season_df$away.score=strtoi(season_df$away.score)
    season_df$home.wins=strtoi(season_df$home.wins)
    season_df$away.wins=strtoi(season_df$away.wins)
    

    home_stats=season_df%>%group_by(home.team)%>% summarize(home.goals=sum(home.score), 
                                                        home.goals.against=sum(away.score),
                                                       home.points=sum(home.result),
                                                           home.wins=sum(home.wins))
    away_stats=season_df %>% group_by(away.team) %>% summarize(away.goals=sum(away.score), 
                                                           away.goals.against=sum(home.score),
                                                          away.points=sum(away.result),
                                                              away.wins=sum(away.wins))
    stats= bind_cols(home_stats, away_stats)
    stats['total.goals.for']=stats$home.goals + stats$away.goals
    stats['total.goals.against']=stats$home.goals.against + stats$away.goals.against
    stats['total.points']=stats$home.points + stats$away.points
    stats['total.wins']=stats$home.wins + stats$away.wins
    stats['total.wins']=strtoi(stats$total.wins)
    stats= stats %>% select(home.team, total.goals.for, total.goals.against, total.points, total.wins)
    stats=rename(stats, 'Team'='home.team')
    
    stats['goal_dif']=stats$total.goals.for-stats$total.goals.against
    stats['random_seed']=runif(20, -20,20)
    placement= data.frame(arrange(stats, desc(total.points), desc(goal_dif), desc(total.goals.for), desc(random_seed), desc(total.wins)))
    placement['place']=1:20
    #Prize winnings (in GB pounds, millions)
    placement['prize']=c(215, 210, 195,185, 165,152,145,130,125,122,118,115,112,110,105,102,100,52,50,49)
    rownames(placement)<-placement$Team
    return(placement[team,c('place', 'prize','total.wins')])
    
    }
 

In [22]:
off_improve(league_data, 'Manchester United')

Unnamed: 0_level_0,place,prize,total.wins
Unnamed: 0_level_1,<int>,<dbl>,<int>
Manchester United,12,115,11


In [23]:
#Reprint of normal monte carlo function
monte.carlo.off.sim<-function(fun,fun.arg,nSims=10){
    rep1<-do.call(fun, fun.arg)
    
    offMatrix <- matrix(1, nrow=1, ncol=nSims)
    rownames(offMatrix)<-rownames(rep1)
    team=rownames(offMatrix)
    offMatrix[team,1]<-rep1[team, 'prize']
    
    for (rep in 2:nSims) {
        rep1<-do.call(fun, fun.arg)
        offMatrix[team,rep] <- rep1[team, 'prize']
    
    
    }
    
    offMatrix<-data.frame(offMatrix)
    
    avg_prize_off<-rowSums(offMatrix)/nSims
    avg_prize_off<-data.frame(avg_prize_off)
    rownames(avg_prize_off)<-rownames(rep1)
    
    return(avg_prize_off)
    }

In [24]:
result<-monte.carlo.off.sim(off_improve, fun.arg=list(league_data, 'Manchester United'), nSims=10)
result

Unnamed: 0_level_0,avg_prize_off
Unnamed: 0_level_1,<dbl>
Manchester United,142


In [None]:
#3000 sims of boosted offense for each team
set.seed(0)
league_data=league_data
    #Load in league data and offense and defense scores
    premLeague<-subset(league_data,league== "Barclays Premier League")
    premTeams<-premLeague$name
    offMatrix<-matrix(1, nrow=20, ncol=1)
    rownames(offMatrix)<-premTeams
    colnames(offMatrix)<-'avg_prize_off'

for (i in premTeams){

    result<-monte.carlo.off.sim(off_improve, fun.arg=list(league_data, i), nSims=3000)
    offMatrix[i, 'avg_prize_off']<-result[i, 'avg_prize_off']
}
offMatrix<-data.frame(offMatrix)

In [None]:
#write.table(offMatrix, file = "results_sim_3_off3.csv",
 #           sep = "\t", row.names = T)
offMatrix<-read.table('results_sim_3_off3.csv')
orig_results
offMatrix

In [None]:
def_improve<- function(league_data,team){
    team=team
    league_data=league_data
    #Load in league data and offense and defense scores
    premLeague<-subset(league_data,league== "Barclays Premier League")
    premTeams<-premLeague$name
    rownames(premLeague)<-premTeams
    df<-premLeague[,c("off","def")]
    
    #Create simulation parameters
    df['adj_def']<-df$def
    df[team, 'adj_def']<- df[team, 'adj_def']- .1* df[team, 'adj_def']
    lmean.def<- log(mean(df$def))
    lmean.off<- log(mean(df$off)) 
    
    df["alpha"]<-log(df["off"])-lmean.def
    df["delta"]<-lmean.off-log(df["adj_def"])
    
    
    alphaList<-df$alpha
    deltaList<-df$delta
    names(alphaList)<-rownames(df)
    names(deltaList)<-rownames(df)

    #Generate permutations of all matches
    allMatches<-permutations(20, 2, v=rownames(df),repeats.allowed=FALSE)
    colnames(allMatches)<-c("home","away")    

    
    #function for simulating a single game
    draw.score<-function(team1,team2){
    c(rpois(1,exp(alphaList[team1]-deltaList[team2])),
      rpois(1,exp(alphaList[team2]-deltaList[team1])))
    }
    
    ScoresMatrix <- matrix(nrow=nrow(allMatches),  ncol=4)
    for (ii in 1:nrow(allMatches)  ) {
     ScoresMatrix[ii,1:2]=allMatches[ii,]
     ScoresMatrix[ii,3:4]= draw.score(allMatches[ii,"home"],allMatches[ii,"away"] ) 
      
    }

    colnames(ScoresMatrix)<-c("home.team","away.team","home.score","away.score")

    season_df<- data.frame(ScoresMatrix)    
        
    season_df= season_df %>% mutate(
                                    home.result = case_when(season_df$home.score > season_df$away.score~3,
                                   season_df$home.score< season_df$away.score ~0,
                                   season_df$home.score== season_df$away.score~1),
                                    home.wins=ifelse(home.result==3,1,0))
    
    season_df = season_df %>% mutate(
                                    away.result= case_when (away.score > home.score~3,
                                   away.score< home.score ~0,
                                   away.score== home.score~1),
                                    away.wins= ifelse(away.result==3,1,0))
    season_df$home.score=strtoi(season_df$home.score)
    season_df$away.score=strtoi(season_df$away.score)
    season_df$home.wins=strtoi(season_df$home.wins)
    season_df$away.wins=strtoi(season_df$away.wins)
    

    home_stats=season_df%>%group_by(home.team)%>% summarize(home.goals=sum(home.score), 
                                                        home.goals.against=sum(away.score),
                                                       home.points=sum(home.result),
                                                           home.wins=sum(home.wins))
    away_stats=season_df %>% group_by(away.team) %>% summarize(away.goals=sum(away.score), 
                                                           away.goals.against=sum(home.score),
                                                          away.points=sum(away.result),
                                                              away.wins=sum(away.wins))
    stats= bind_cols(home_stats, away_stats)
    stats['total.goals.for']=stats$home.goals + stats$away.goals
    stats['total.goals.against']=stats$home.goals.against + stats$away.goals.against
    stats['total.points']=stats$home.points + stats$away.points
    stats['total.wins']=stats$home.wins + stats$away.wins
    stats['total.wins']=strtoi(stats$total.wins)
    stats= stats %>% select(home.team, total.goals.for, total.goals.against, total.points, total.wins)
    stats=rename(stats, 'Team'='home.team')
    
    stats['goal_dif']=stats$total.goals.for-stats$total.goals.against
    stats['random_seed']=runif(20, -20,20)
    placement= data.frame(arrange(stats, desc(total.points), desc(goal_dif), desc(total.goals.for), desc(random_seed), desc(total.wins)))
    placement['place']=1:20
    #Prize winnings (in GB pounds, millions)
    placement['prize']=c(215, 210, 195,185, 165,152,145,130,125,122,118,115,112,110,105,102,100,52,50,49)
    rownames(placement)<-placement$Team
    return(placement[team,c('place', 'prize','total.wins')])
    
    }

In [None]:
def_improve(league_data, 'Watford')

In [None]:
monte.carlo.def.sim<-function(fun,fun.arg,nSims=100){
    rep1<-do.call(fun, fun.arg)
    
    defMatrix <- matrix(1, nrow=1, ncol=nSims)
    rownames(defMatrix)<-rownames(rep1)
    team=rownames(defMatrix)
    defMatrix[team,1]<-rep1[team, 'prize']
    
    for (rep in 2:nSims) {
        rep1<-do.call(fun, fun.arg)
        defMatrix[team,rep] <- rep1[team, 'prize']
    
    
    }
    
    defMatrix<-data.frame(defMatrix)
    
    avg_prize_def<-rowSums(defMatrix)/nSims
    avg_prize_def<-data.frame(avg_prize_def)
    rownames(avg_prize_def)<-rownames(rep1)
    
    return(avg_prize_def)
    }


In [None]:
result<-monte.carlo.def.sim(off_improve, fun.arg=list(league_data, 'Manchester United'), nSims=10)
result

In [None]:
#3000 simulations of bolstered defense for each team
set.seed(0)
league_data=league_data
    #Load in league data and offense and defense scores
    premLeague<-subset(league_data,league== "Barclays Premier League")
    premTeams<-premLeague$name
    defMatrix<-matrix(1, nrow=20, ncol=1)
    rownames(defMatrix)<-premTeams
    colnames(defMatrix)<-'avg_prize_def'

for (i in premTeams){

    result<-monte.carlo.def.sim(def_improve, fun.arg=list(league_data, i), nSims=3000)
    defMatrix[i, 'avg_prize_def']<-result[i, 'avg_prize_def']
}
defMatrix<-data.frame(defMatrix)

In [None]:
#write.table(defMatrix, file = "results_sim_3_def.csv",
 #           sep = "\t", row.names = T)
defMatrix=read.table('results_sim_3_def.csv')
orig_results
defMatrix

In [None]:
spending_df=bind_cols(offMatrix, defMatrix)
names<-rownames(spending_df)
names

In [None]:
#Add average prize values from first simulation to the data frame
spending_df['orig_avg']<-1
spending_df['avg_place']<-1
for (i in names){
spending_df[i, 'orig_avg']<- orig_results[i, 'avg_prize']
spending_df[i, 'avg_place']<- orig_results[i, 'avg_place']
       
}
spending_df['off_improve']<-spending_df$avg_prize_off - spending_df$orig_avg
spending_df['def_improve']<-spending_df$avg_prize_def - spending_df$orig_avg
spending_df['strategy']<-ifelse(spending_df$off_improve > spending_df$def_improve, 'Offense','Defense')

In [None]:
spending_df

In [None]:
#Master Season simulator function...outputs ranking and points

season_sim<- function(league_data){

    league_data=league_data
    #Load in league data and offense and defense scores
    premLeague<-subset(league_data,league== "Barclays Premier League")
    premTeams<-premLeague$name
    rownames(premLeague)<-premTeams
    df<-premLeague[,c("off","def")]
    
    #Create simulation parameters
    lmean.def<- log(mean(df$def))
    lmean.off<- log(mean(df$off))               
    df["alpha"]<-log(df["off"])-lmean.def
    df["delta"]<-lmean.off-log(df["def"])

    alphaList<-df$alpha
    deltaList<-df$delta
    names(alphaList)<-rownames(df)
    names(deltaList)<-rownames(df)

    #Generate permutations of all matches
    allMatches<-permutations(20, 2, v=rownames(df),repeats.allowed=FALSE)
    colnames(allMatches)<-c("home","away")    

    
    #function for simulating a single game
    draw.score<-function(team1,team2){
    c(rpois(1,exp(alphaList[team1]-deltaList[team2])),
      rpois(1,exp(alphaList[team2]-deltaList[team1])))
    }
    
    #Initialize Scores matrix to house results of all matches
    ScoresMatrix <- matrix(nrow=nrow(allMatches),  ncol=4)
    for (ii in 1:nrow(allMatches)  ) {
     ScoresMatrix[ii,1:2]=allMatches[ii,]
     ScoresMatrix[ii,3:4]= draw.score(allMatches[ii,"home"],allMatches[ii,"away"] ) 
      
    }
    
    colnames(ScoresMatrix)<-c("home.team","away.team","home.score","away.score")
    
    #Season_df has season data
    season_df<- data.frame(ScoresMatrix)    
    
    #create useful columns home result awards points for the outcome and home wins counts the number of wins (will be useful later)
    season_df= season_df %>% mutate(
                                    home.result = case_when(season_df$home.score > season_df$away.score~3,
                                   season_df$home.score< season_df$away.score ~0,
                                   season_df$home.score== season_df$away.score~1),
                                    home.wins=ifelse(home.result==3,1,0))
    #Does the same for away
    season_df = season_df %>% mutate(
                                    away.result= case_when (away.score > home.score~3,
                                   away.score< home.score ~0,
                                   away.score== home.score~1),
                                    away.wins= ifelse(away.result==3,1,0))
    #Need to change these from string to integer
    season_df$home.score=strtoi(season_df$home.score)
    season_df$away.score=strtoi(season_df$away.score)
    season_df$home.wins=strtoi(season_df$home.wins)
    season_df$away.wins=strtoi(season_df$away.wins)
    
    #Summing up home goals, points, wins, etc. for each team when it functions as the home team
    home_stats=season_df%>%group_by(home.team)%>% summarize(home.goals=sum(home.score), 
                                                        home.goals.against=sum(away.score),
                                                        home.points=sum(home.result),
                                                        home.wins=sum(home.wins))
    
    #Do the same summary stats for teams when they are away teams
    away_stats=season_df %>% group_by(away.team) %>% summarize(away.goals=sum(away.score), 
                                                           away.goals.against=sum(home.score),
                                                          away.points=sum(away.result),
                                                               away.wins=sum(away.wins))
    
    #Combine the home and away summary data, clean it up and create more columns
    stats= bind_cols(home_stats, away_stats)
    stats['total.goals.for']=stats$home.goals + stats$away.goals
    stats['total.goals.against']=stats$home.goals.against + stats$away.goals.against
    stats['total.points']=stats$home.points + stats$away.points
    stats['total.wins']=stats$home.wins + stats$away.wins
    stats['total.wins']=strtoi(stats$total.wins)
    
    #Select useful columns
    stats= stats %>% select(home.team, total.goals.for, total.goals.against, total.points, total.wins)
    stats=rename(stats, 'Team'='home.team')
    stats['goal_dif']=stats$total.goals.for-stats$total.goals.against
    stats['random_seed']=runif(20, -20,20)
    
    #Arrange stats to determine place and prize information.
    placement= data.frame(arrange(stats, desc(total.points), desc(goal_dif), desc(total.goals.for), desc(random_seed), desc(total.wins)))
    placement['place']=1:20
    
    #Prize winnings (in GB pounds, millions). I ballparked these numbers from the graph Alistair gave us
    placement['prize']=c(215, 210, 195,185, 165,152,145,130,125,122,118,115,112,110,105,102,100,52,50,49)
    rownames(placement)<-placement$Team
    return(placement[,c('place', 'prize','total.wins')])
    }
    

In [None]:
#Monte carlo simulation function for regular season

monte.carlo.sim<-function(fun,fun.arg,nSims=100){
    #Call the function you want to load once..this will be the season simulation function
    rep1<-do.call(fun, fun.arg)
    #create list of teamnames from the index of the season data (since the rows are teamnames)
    teamnames<-rownames(rep1)
    #Initialize placement, prize, and win matrices to house the simulation results
    #These matrices will have twenty rows, one for each team, and one entry for each simulation you run (which is why
    #number of columns equals num of simulations)
    
    placementMatrix <- matrix(1,nrow=20, ncol=nSims)
    prizeMatrix<- matrix(1, nrow=20, ncol=nSims)
    winMatrix<-matrix(1, nrow=20, ncol=nSims)
    #use teamnames as their indices
    rownames(placementMatrix) <- teamnames 
    rownames(prizeMatrix)<-teamnames
    rownames(winMatrix)<-teamnames
    
    #Loop over each team and add the information from the first simulation into these matrices. 
    for (i in teamnames){
        placementMatrix[i,1 ]<-rep1[i, 'place']
        prizeMatrix[i, 1]<-rep1[i, 'prize']
        winMatrix[i,1]<-rep1[i, 'total.wins']
    }
    
    #Loop to initialize the remaining simulations and populate the matrices with their results
    for (rep in 2:nSims) {
        rep1<-do.call(fun, fun.arg) 
        for (i in teamnames){
            placementMatrix[i,rep ]<-rep1[i, 'place']
            prizeMatrix[i, rep]<-rep1[i, 'prize']
            winMatrix[i,rep]<-rep1[i, 'total.wins']}

    }
    
    place<-data.frame(placementMatrix)
    prize<-data.frame(prizeMatrix)
    win<-data.frame(winMatrix)
    
    results<-rbind(place, prize, win)
    
    return (results)
    
}

In [None]:
data<-monte.carlo.sim(season_sim, c(list(league_data)), nSims=30000)
data<-data.frame(t(data))

In [None]:
write.table(data, file = "data_from_sim1.csv",
            sep = "\t", row.names = T)

In [None]:
prize_data<-data[, 21:40]
prize_data