In [1276]:
import pandas as pd
import DataGathering.FileHandling as files
import jellyfish

In [1277]:
def unifyDates(date: str) -> str:
    if len(date) == 0:
        return "0000/00/00"
    if "." in date:
        date = date.split(".")
    elif "/" in date:
        date = date.split("/")
    if len(date[0]) != 4:
        date = date[::-1]
    return "/".join(date)

In [1278]:
def getSeason(unifiedDate: str) -> str:
    ymd = unifiedDate.split("/")
    year = int(ymd[0])
    month = int(ymd[1])
    if month >= 10:
        return str(year)+"/"+str(year+1)
    elif month <= 3:
        return str(year-1)+"/"+str(year)
    else:
        return "Off-Season"

In [1279]:
teamNames = files.readFileToList("Data/teamNames.txt")
def getClosestTeamName(teamName: str) -> str:
    return sorted(teamNames, key=lambda savedName: jellyfish.jaro_similarity(savedName, teamName), reverse=True)[0]

In [1280]:
playerNames = files.readFileToList("Data/playerNames.txt")
def getClosestPlayerName(playerName: str) -> str:
    closest = sorted(playerNames, key=lambda savedName: jellyfish.levenshtein_distance(savedName, playerName))[0]
    similarity = jellyfish.jaro_similarity(closest, playerName)
    if similarity > 0.7:
        return closest
    return "No Match"

In [1281]:
goodShits = ["PointsTotal", "PointsDelta", "ServeTotal", "ServeError", "ServePoints", "ReceptionTotal", "ReceptionError", "AttackTotal", "AttackError", "AttackBlocked", "AttackPoints", "BlockPoints"]
def getPlayersFromTeam(game, teamNumber):
    players = {}
    for playerNumber in range(1,15):
        scores = []
        for goodShit in goodShits:
            value = game["{}Player{}Team{}".format(goodShit, playerNumber, teamNumber)]
            if type(value) == str:
                value = 0
            scores.append(value)
        playerName = game["NamePlayer{}Team{}".format(playerNumber, teamNumber)]
        if playerName != "":
            players[playerName] = scores
    return players

In [1282]:
games = []
for gameCSVPath in files.getAllFileNamesIn("Data/CSVs/"):
    game = files.readGameCSV(gameCSVPath)
    games.append(game)

# GamesOnly

In [1283]:
gamesOnly = pd.DataFrame([], columns = ["Date","GameID","Season","M/F","Team1","Team2","Score1","Score2",
                                        "Player1Team1",
                                        "Player2Team1",
                                        "Player3Team1",
                                        "Player4Team1",
                                        "Player5Team1",
                                        "Player6Team1",
                                        "Player7Team1",
                                        "Player8Team1",
                                        "Player9Team1",
                                        "Player10Team1",
                                        "Player11Team1",
                                        "Player12Team1",
                                        "Player13Team1",
                                        "Player14Team1"
                                        "Player1Team2",
                                        "Player2Team2",
                                        "Player3Team2",
                                        "Player4Team2",
                                        "Player5Team2",
                                        "Player6Team2",
                                        "Player7Team2",
                                        "Player8Team2",
                                        "Player9Team2",
                                        "Player10Team2",
                                        "Player11Team2",
                                        "Player12Team2",
                                        "Player13Team2",
                                        "Player14Team2"])

In [1284]:
gamesOnly

Unnamed: 0,Date,GameID,Season,M/F,Team1,Team2,Score1,Score2,Player1Team1,Player2Team1,...,Player5Team2,Player6Team2,Player7Team2,Player8Team2,Player9Team2,Player10Team2,Player11Team2,Player12Team2,Player13Team2,Player14Team2


In [1285]:
for game in games:
    d = {}
    date = unifyDates(game["Date"])
    d["Date"] = date
    gameID = game["MatchNumber"]
    d["GameID"] = gameID
    season = getSeason(date)
    d["Season"] = season
    team1 = getClosestTeamName(game["NameTeam1"])
    team2 = getClosestTeamName(game["NameTeam2"])
    d["Team1"] = team1
    d["Team2"] = team2
    score1 = int(game["ScoreTeam1"])
    score2 = int(game["ScoreTeam2"])
    d["Score1"] = score1
    d["Score2"] = score2
    mf = "-"
    if "Män" in game["League"]:
        mf = "M"
    elif "Fra" in game["League"]:
        mf = "F"
    d["M/F"] = mf
    playersTeam1 = list(getPlayersFromTeam(game, 1).keys())
    playersTeam2 = list(getPlayersFromTeam(game, 2).keys())
    for i in range(1, 15):
        player = "-"
        if len(playersTeam1) > i:
            player = playersTeam1[i-1]
            player = getClosestPlayerName(player)
        d["Player{}Team1".format(i)] = player
    for i in range(1, 15):
        player = "-"
        if len(playersTeam2) >= i:
            player = playersTeam2[i-1]
            player = getClosestPlayerName(player)
        d["Player{}Team2".format(i)] = player
        
    gamesOnly = gamesOnly.append(d, ignore_index=True)

In [1286]:
gamesOnly

Unnamed: 0,Date,GameID,Season,M/F,Team1,Team2,Score1,Score2,Player1Team1,Player2Team1,...,Player7Team2,Player8Team2,Player9Team2,Player10Team2,Player11Team2,Player12Team2,Player13Team2,Player14Team2,Player14Team1,Player1Team2
0,2020/12/20,1045.0,2020/2021,M,"""United Volleys Frankfurt""","""Volleyball Bisons Bühl""",3,1,Imhoff Facundo,Tsuiki Satoshi,...,Henning Paul,Jurkovics Mathäus,Meier Leon,Ringseis Florian,Kronthaler Niklas,Lopez Tomas,-,-,-,Szabó Alpár
1,2020/11/04,1001.0,2020/2021,M,"""Volleyball Bisons Bühl""","""TSV Unterhaching""",3,0,Thiel Stefan,Lind Jonas,...,Brandt Severin,Brandt Niklas,Petrusic Juro,Thom Benjamin,Gehringer Paul,Friedrich Roy,Paduretu Eric,Chefai Mohamed,-,Suck Fabian
2,2021/01/23,1065.0,2020/2021,M,"""VfB Friedrichshafen""","""TSV Unterhaching""",3,0,Worsley Joseph,Aylsworth Avery,...,Brandt Severin,Brandt Niklas,Petrusic Juro,Thom Benjamin,Gehringer Paul,Friedrich Roy,Paduretu Eric,Chefai Mohamed,-,Suck Fabian
3,2020/10/03,2002.0,2020/2021,F,"""NawaRo Straubing""","""Schwarz-Weiß Erfurt""",3,1,Provaroni Claudia,Ismaili Valbona,...,Lenz Sindy,Michel Tosi Victoria,Gross Jasmine,Barthel Meghan,Maase Rica,Cole Madelyn,-,-,-,Palmer Madeline
4,2020/10/29,2019.0,2020/2021,F,"""Rote Raben Vilsbiburg""","""NawaRo Straubing""",3,0,Dammer Simona,Keller Luisa,...,Schweigmann Janna,Hänle Marie,Dreblow Sophie,Scholten Iris,Gryka Magdalena,-,-,-,-,Provaroni Claudia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
372,2020/11/14,2035.0,2020/2021,F,"""Allianz MTV Stuttgart""","""NawaRo Straubing""",3,0,Koskelo Roosa,Segura Palleres Maria,...,Dreblow Sophie,Scholten Iris,-,-,-,-,-,-,-,Provaroni Claudia
373,2020/12/05,2052.0,2020/2021,F,"""Allianz MTV Stuttgart""","""Schwarz-Weiß Erfurt""",3,1,Koskelo Roosa,Segura Palleres Maria,...,Michel Tosi Victoria,Gross Jasmine,Barthel Meghan,Maase Rica,Merte Franca,Cole Madelyn,-,-,-,Palmer Madeline
374,2019/11/30,1052.0,2019/2020,M,"""WWK Volleys Herrsching""","""VfB Friedrichshafen""",3,2,Ilic Dorde,Stöhr Tim,...,Janouch Jakub,Juhkami Martti,Malescha Daniel,Steuerwald Markus,Gjorgiev Nikola,Schmidt Brendan,-,-,-,Worsley Joseph
375,2019/10/23,2018.0,2019/2020,F,"""NawaRo Straubing""","""Allianz MTV Stuttgart""",0,3,Steen Knudsen Ragni,Stöhr Tim,...,Rosenthal Jenna,Jegdic Aleksandra,No Match,Rivers Krystal,Janiska Jennifer,Berger Lara,No Match,-,-,Koskelo Roosa


# Players

In [1287]:
playerStatsPerGame = pd.DataFrame([], columns = ["PlayerName","M/F","PlayerTeam","OpposingTeam","Score1","Score2","Date","GameID","Season","PointsTotal", "PointsDelta", "ServeTotal", "ServeError", "ServePoints", "ReceptionTotal", "ReceptionError", "AttackTotal", "AttackError", "AttackBlocked", "AttackPoints", "BlockPoints"])

In [1288]:
for game in games:
    date = unifyDates(game["Date"])
    gameID = game["MatchNumber"]
    season = getSeason(date)
    team1 = getClosestTeamName(game["NameTeam1"])
    team2 = getClosestTeamName(game["NameTeam2"])
    score1 = int(game["ScoreTeam1"])
    score2 = int(game["ScoreTeam2"])
    mf = "-"
    if "Män" in game["League"]:
        mf = "M"
    elif "Fra" in game["League"]:
        mf = "F"
    playersTeam1 = getPlayersFromTeam(game, 1)
    playersTeam2 = getPlayersFromTeam(game, 2)
    for playerName, stats in playersTeam1.items():
        playerName = getClosestPlayerName(playerName)
        row = {"PlayerName":playerName,
               "M/F":mf,
               "PlayerTeam":team1,
               "OpposingTeam":team2,
               "Score1":score1,
               "Score2":score2,
               "Date":date,
               "GameID":gameID,
               "Season":season,
               "PointsTotal":stats[0],
               "PointsDelta":stats[1],
               "ServeTotal":stats[2],
               "ServeError":stats[3],
               "ServePoints":stats[4],
               "ReceptionTotal":stats[5],
               "ReceptionError":stats[6],
               "AttackTotal":stats[7],
               "AttackError":stats[8],
               "AttackBlocked":stats[9],
               "AttackPoints":stats[10],
                "BlockPoints":stats[11]}
        playerStatsPerGame = playerStatsPerGame.append(row, ignore_index=True)
        
    for playerName, stats in playersTeam2.items():
        playerName = getClosestPlayerName(playerName)
        row = {"PlayerName":playerName,
               "M/F":mf,
               "PlayerTeam":team2,
               "OpposingTeam":team1,
               "Score1":score2,
               "Score2":score1,
               "Date":date,
               "GameID":gameID,
               "Season":season,
               "PointsTotal":stats[0],
               "PointsDelta":stats[1],
               "ServeTotal":stats[2],
               "ServeError":stats[3],
               "ServePoints":stats[4],
               "ReceptionTotal":stats[5],
               "ReceptionError":stats[6],
               "AttackTotal":stats[7],
               "AttackError":stats[8],
               "AttackBlocked":stats[9],
               "AttackPoints":stats[10],
                "BlockPoints":stats[11]}
        playerStatsPerGame = playerStatsPerGame.append(row, ignore_index=True)
        

In [1289]:
playerStatsPerGame

Unnamed: 0,PlayerName,M/F,PlayerTeam,OpposingTeam,Score1,Score2,Date,GameID,Season,PointsTotal,...,ServeTotal,ServeError,ServePoints,ReceptionTotal,ReceptionError,AttackTotal,AttackError,AttackBlocked,AttackPoints,BlockPoints
0,Imhoff Facundo,M,"""United Volleys Frankfurt""","""Volleyball Bisons Bühl""",3,1,2020/12/20,1045.0,2020/2021,9.0,...,11.0,0,0,1.0,0,12.0,0,1.0,8.0,1.0
1,Tsuiki Satoshi,M,"""United Volleys Frankfurt""","""Volleyball Bisons Bühl""",3,1,2020/12/20,1045.0,2020/2021,0.0,...,0.0,0,0,22.0,1.0,0.0,0,0.0,0.0,0.0
2,Baxpöhler Noah,M,"""United Volleys Frankfurt""","""Volleyball Bisons Bühl""",3,1,2020/12/20,1045.0,2020/2021,10.0,...,18.0,0,0,0.0,0,12.0,0,0.0,7.0,3.0
3,Reinhardt Jonas,M,"""United Volleys Frankfurt""","""Volleyball Bisons Bühl""",3,1,2020/12/20,1045.0,2020/2021,0.0,...,0.0,0,0,0.0,0,0.0,0,0.0,0.0,0.0
4,Grozer Tim,M,"""United Volleys Frankfurt""","""Volleyball Bisons Bühl""",3,1,2020/12/20,1045.0,2020/2021,20.0,...,16.0,6.0,1.0,17.0,4.0,35.0,1.0,2.0,19.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9112,Brisebois Danielle,F,"""Schwarz-Weiß Erfurt""","""SC Potsdam""",0,3,2020/02/26,2114.0,2019/2020,2.0,...,3.0,1.0,0,0.0,0,12.0,0,1.0,2.0,0.0
9113,Maase Rica,F,"""Schwarz-Weiß Erfurt""","""SC Potsdam""",0,3,2020/02/26,2114.0,2019/2020,0.0,...,0.0,0,0,0.0,0,0.0,0,0.0,0.0,0.0
9114,Pettke Jennifer,F,"""Schwarz-Weiß Erfurt""","""SC Potsdam""",0,3,2020/02/26,2114.0,2019/2020,1.0,...,9.0,0,1.0,0.0,0,4.0,0,0.0,0.0,0.0
9115,Flory Lindsay,F,"""Schwarz-Weiß Erfurt""","""SC Potsdam""",0,3,2020/02/26,2114.0,2019/2020,1.0,...,5.0,0,0,0.0,0,5.0,0,0.0,1.0,0.0


In [1290]:
playerStatsPerGame[playerStatsPerGame["PlayerName"] == "Grankin Sergey"]

Unnamed: 0,PlayerName,M/F,PlayerTeam,OpposingTeam,Score1,Score2,Date,GameID,Season,PointsTotal,...,ServeTotal,ServeError,ServePoints,ReceptionTotal,ReceptionError,AttackTotal,AttackError,AttackBlocked,AttackPoints,BlockPoints
127,Grankin Sergey,M,"""BERLIN RECYCLING Volleys""","""TSV Unterhaching""",3,0,2019/10/27,1021.0,2019/2020,2.0,...,11.0,3.0,1.0,0.0,0.0,2.0,0.0,1.0,1.0,0.0
663,Grankin Sergey,M,"""BERLIN RECYCLING Volleys""","""BERLIN RECYCLING Volleys""",3,2,2019/10/23,1018.0,2019/2020,3.0,...,8.0,1.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,1.0
748,Grankin Sergey,M,"""BERLIN RECYCLING Volleys""","""SWD powervolleys DÜREN""",3,0,2019/11/16,1039.0,2019/2020,5.0,...,14.0,3.0,1.0,0.0,0.0,3.0,0.0,0.0,2.0,2.0
1099,Grankin Sergey,M,"""BERLIN RECYCLING Volleys""","""Volleyball Bisons Bühl""",1,3,2020/10/31,1011.0,2020/2021,0.0,...,5.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1187,Grankin Sergey,M,"""BERLIN RECYCLING Volleys""","""WWK Volleys Herrsching""",3,0,2019/10/30,1109.0,2019/2020,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1422,Grankin Sergey,M,"""BERLIN RECYCLING Volleys""","""VfB Friedrichshafen""",0,3,2020/11/21,1022.0,2020/2021,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1510,Grankin Sergey,M,"""BERLIN RECYCLING Volleys""","""VCO Berlin""",3,0,2020/11/04,1207.0,2020/2021,2.0,...,7.0,3.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,0.0
2263,Grankin Sergey,M,"""BERLIN RECYCLING Volleys""","""WWK Volleys Herrsching""",3,0,2019/11/09,1028.0,2019/2020,1.0,...,11.0,5.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
2323,Grankin Sergey,M,"""BERLIN RECYCLING Volleys""","""United Volleys Frankfurt""",3,2,2019/22/12,1063.0,2019/2020,4.0,...,16.0,6.0,2.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0
2522,Grankin Sergey,M,"""BERLIN RECYCLING Volleys""","""United Volleys Frankfurt""",2,3,2021/02/06,1074.0,2020/2021,3.0,...,17.0,1.0,1.0,0.0,0.0,3.0,1.0,0.0,2.0,0.0


In [1291]:
playersStatsPerSet = pd.DataFrame([], columns=["PlayerName", "M/F", "PlayerTeam", "PointsTotal", "PointsDelta", "ServeTotal", "ServeError", "ServePoints", "ReceptionTotal", "ReceptionError", "AttackTotal", "AttackError", "AttackBlocked", "AttackPoints", "BlockPoints"])

In [1292]:
playersStatsPerSet

Unnamed: 0,PlayerName,M/F,PlayerTeam,PointsTotal,PointsDelta,ServeTotal,ServeError,ServePoints,ReceptionTotal,ReceptionError,AttackTotal,AttackError,AttackBlocked,AttackPoints,BlockPoints


In [1293]:
values = ["PointsTotal", "PointsDelta", "ServeTotal", "ServeError", "ServePoints", "ReceptionTotal", "ReceptionError", "AttackTotal", "AttackError", "AttackBlocked", "AttackPoints", "BlockPoints"]

In [1294]:
for playerName in playerNames:
    d = {}
    d["PlayerName"] = playerName
    allStatsOfPlayer = playerStatsPerGame[playerStatsPerGame["PlayerName"] == playerName]
    if allStatsOfPlayer.size == 0:
        continue
    mf = allStatsOfPlayer.iloc[0]["M/F"]
    d["M/F"] = mf
    thisSeasonStats = allStatsOfPlayer[allStatsOfPlayer["Season"] == "2020/2021"]
    if thisSeasonStats.size == 0:
        continue
    team = thisSeasonStats.iloc[0]["PlayerTeam"].replace('"', '')
    d["PlayerTeam"] = team
    playerTeamScoreSum = allStatsOfPlayer["Score1"].sum()
    opposingTeamsScoreSum = allStatsOfPlayer["Score2"].sum()
    for value in values:
        s = allStatsOfPlayer[value].sum()
        d[value] = s / (playerTeamScoreSum+opposingTeamsScoreSum)
    playersStatsPerSet = playersStatsPerSet.append(d, ignore_index=True)

In [1295]:
playersStatsPerSet

Unnamed: 0,PlayerName,M/F,PlayerTeam,PointsTotal,PointsDelta,ServeTotal,ServeError,ServePoints,ReceptionTotal,ReceptionError,AttackTotal,AttackError,AttackBlocked,AttackPoints,BlockPoints
0,Abercrombie Brittany,F,SC Potsdam,4.241935,2.572581,3.532258,0.298387,0.193548,0.000000,0.000000,9.838710,0.709677,0.661290,3.725806,0.322581
1,Agbortabi Vanessa,F,SC Potsdam,1.910448,0.776119,2.641791,0.179104,0.126866,5.194030,0.268657,4.932836,0.395522,0.291045,1.619403,0.164179
2,Agost Taylor,F,VfB Suhl LOTTO Thüringen,1.618321,1.106870,2.396947,0.213740,0.076336,0.045802,0.022901,3.335878,0.145038,0.129771,1.374046,0.167939
3,Alsmeier Lina,F,USC Münster,3.380952,1.285714,3.396825,0.579365,0.333333,6.238095,0.412698,7.325397,0.746032,0.357143,2.619048,0.428571
4,Ambrosius Lea,F,SSC Palmberg Schwerin,0.244094,0.000000,0.748031,0.188976,0.031496,0.000000,0.000000,0.330709,0.031496,0.023622,0.125984,0.086614
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
384,Fach Chiara,F,VC Wiesbaden,0.000000,0.000000,0.250000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
385,Mohwinkel Theo,M,SVG Lüneburg,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
386,Garski Jacqueline,F,VfB Suhl LOTTO Thüringen,0.000000,0.000000,0.181818,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
387,Hirsch Jana,F,VC Wiesbaden,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [1296]:
playersStatsPerSet.to_csv('PlayerPerformances.csv')
playersStatsPerSet.to_excel("PlayerPerformances.xlsx")

# Learning

In [1431]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor

In [1432]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [1433]:
winOrLose = lambda val: 1 if val > 0.5 else 0

In [1434]:
gamesOnly

Unnamed: 0,Date,GameID,Season,M/F,Team1,Team2,Score1,Score2,Player1Team1,Player2Team1,...,Player7Team2,Player8Team2,Player9Team2,Player10Team2,Player11Team2,Player12Team2,Player13Team2,Player14Team2,Player14Team1,Player1Team2
0,2020/12/20,1045.0,2020/2021,M,"""United Volleys Frankfurt""","""Volleyball Bisons Bühl""",3,1,Imhoff Facundo,Tsuiki Satoshi,...,Henning Paul,Jurkovics Mathäus,Meier Leon,Ringseis Florian,Kronthaler Niklas,Lopez Tomas,-,-,-,Szabó Alpár
1,2020/11/04,1001.0,2020/2021,M,"""Volleyball Bisons Bühl""","""TSV Unterhaching""",3,0,Thiel Stefan,Lind Jonas,...,Brandt Severin,Brandt Niklas,Petrusic Juro,Thom Benjamin,Gehringer Paul,Friedrich Roy,Paduretu Eric,Chefai Mohamed,-,Suck Fabian
2,2021/01/23,1065.0,2020/2021,M,"""VfB Friedrichshafen""","""TSV Unterhaching""",3,0,Worsley Joseph,Aylsworth Avery,...,Brandt Severin,Brandt Niklas,Petrusic Juro,Thom Benjamin,Gehringer Paul,Friedrich Roy,Paduretu Eric,Chefai Mohamed,-,Suck Fabian
3,2020/10/03,2002.0,2020/2021,F,"""NawaRo Straubing""","""Schwarz-Weiß Erfurt""",3,1,Provaroni Claudia,Ismaili Valbona,...,Lenz Sindy,Michel Tosi Victoria,Gross Jasmine,Barthel Meghan,Maase Rica,Cole Madelyn,-,-,-,Palmer Madeline
4,2020/10/29,2019.0,2020/2021,F,"""Rote Raben Vilsbiburg""","""NawaRo Straubing""",3,0,Dammer Simona,Keller Luisa,...,Schweigmann Janna,Hänle Marie,Dreblow Sophie,Scholten Iris,Gryka Magdalena,-,-,-,-,Provaroni Claudia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
372,2020/11/14,2035.0,2020/2021,F,"""Allianz MTV Stuttgart""","""NawaRo Straubing""",3,0,Koskelo Roosa,Segura Palleres Maria,...,Dreblow Sophie,Scholten Iris,-,-,-,-,-,-,-,Provaroni Claudia
373,2020/12/05,2052.0,2020/2021,F,"""Allianz MTV Stuttgart""","""Schwarz-Weiß Erfurt""",3,1,Koskelo Roosa,Segura Palleres Maria,...,Michel Tosi Victoria,Gross Jasmine,Barthel Meghan,Maase Rica,Merte Franca,Cole Madelyn,-,-,-,Palmer Madeline
374,2019/11/30,1052.0,2019/2020,M,"""WWK Volleys Herrsching""","""VfB Friedrichshafen""",3,2,Ilic Dorde,Stöhr Tim,...,Janouch Jakub,Juhkami Martti,Malescha Daniel,Steuerwald Markus,Gjorgiev Nikola,Schmidt Brendan,-,-,-,Worsley Joseph
375,2019/10/23,2018.0,2019/2020,F,"""NawaRo Straubing""","""Allianz MTV Stuttgart""",0,3,Steen Knudsen Ragni,Stöhr Tim,...,Rosenthal Jenna,Jegdic Aleksandra,No Match,Rivers Krystal,Janiska Jennifer,Berger Lara,No Match,-,-,Koskelo Roosa


In [1435]:
Y = gamesOnly["Score1"].to_numpy() / (gamesOnly["Score1"].to_numpy() + gamesOnly["Score2"].to_numpy())

In [1436]:
X = []
for i in range(0, gamesOnly.shape[0]):
    game = gamesOnly.iloc[i]
    dataPoint = np.zeros((2,12))
    for teamNumber in range(1,3):
        teamStrength = np.zeros((1,12))
        playerCounter = 0
        for playerNumber in range(1,15):
            playerNTeamMName = game["Player{}Team{}".format(playerNumber, teamNumber)]
            if playerNTeamMName == "-":
                continue
            playerStrength = playersStatsPerSet[playersStatsPerSet["PlayerName"] == playerNTeamMName].drop(["PlayerName", "M/F", "PlayerTeam"], axis=1)
            playerStrength = playerStrength.to_numpy()
            if np.size(playerStrength, 0) == 0:
                continue
            playerCounter += 1
            teamStrength += playerStrength
        dataPoint[teamNumber-1] = teamStrength / playerCounter
    X.append(dataPoint.flatten())

In [1437]:
Y.shape

(377,)

In [1438]:
Y = Y.astype(float)

In [1439]:
X = np.array(X)

In [1440]:
X.shape

(377, 24)

In [1441]:
X

array([[1.34879195, 0.65701382, 2.01907092, ..., 0.15304762, 0.89143144,
        0.20092915],
       [1.23288446, 0.54230144, 1.76112006, ..., 0.22085425, 0.71875741,
        0.08546193],
       [1.29989844, 0.67324916, 1.56726012, ..., 0.22085425, 0.71875741,
        0.08546193],
       ...,
       [1.59635298, 0.61538392, 2.20484089, ..., 0.16777447, 1.01184973,
        0.18111656],
       [1.75971294, 0.61819775, 2.19717252, ..., 0.14843525, 1.0312637 ,
        0.1847612 ],
       [1.61238826, 0.79566767, 2.13788251, ..., 0.17425188, 0.85274446,
        0.13048903]])

In [1442]:
X, Y = unison_shuffled_copies(X, Y)

In [1443]:
# needed for classifiers
#Y = np.array([winOrLose(p) for p in Y]).astype(int)

In [1444]:
ind = int(X.shape[0] * 1)

In [1445]:
X_train = X[:ind]
Y_train = Y[:ind]
X_test = X[ind:]
Y_test = Y[ind:]

In [1447]:
if X_test.size == 0:
    X_test = X
    Y_test = Y

In [1448]:
lin_reg = LinearRegression()
#log_reg = LogisticRegression()
#rndF_clf = RandomForestClassifier(max_depth=2)
#rndF_reg = RandomForestRegressor(n_estimators=300, max_depth=2)

model = lin_reg
model.fit(X_train, Y_train) 

LinearRegression()

In [1449]:
def predict(nameTeam1, nameTeam2):
    playersTeam1 = playersStatsPerSet[playersStatsPerSet["PlayerTeam"] == nameTeam1]
    playersTeam2 = playersStatsPerSet[playersStatsPerSet["PlayerTeam"] == nameTeam2]
    teamStrength1 = playersTeam1.drop(["PlayerName" ,"M/F", "PlayerTeam"], axis = 1).sum(axis=0).to_numpy() / playersTeam1.shape[0]
    teamStrength2 = playersTeam2.drop(["PlayerName" ,"M/F", "PlayerTeam"], axis = 1).sum(axis=0).to_numpy() / playersTeam2.shape[0]
    X = np.concatenate((teamStrength1, teamStrength2))
    return model.predict([X])

In [1450]:
predict("BERLIN RECYCLING Volleys", "VfB Friedrichshafen")

array([0.7335006])

In [1451]:
pred = model.predict(X_test)

In [1452]:
pred = np.array([winOrLose(p) for p in pred])
real = np.array([winOrLose(p) for p in Y_test])

# n of theoretical correct predictions

In [1453]:
corr = np.sum(pred == real)
print(corr)

281


# n of false predictions

In [1454]:
fal = real.shape[0] - corr
print(fal)

96


In [1455]:
real.shape[0]

377

# Predictor

In [1461]:
team1 = "SVG Lüneburg"
team2 = "Helios GRIZZLYS Giesen"
oddOnTeam1Win = 1.35
oddOnTeam2Win = 2.45
capital = 100

In [1462]:
prediction = predict(team1, team2)[0]
print(prediction)

0.48702063270096885


In [1463]:
betAmountOnTeam1 = ((prediction*oddOnTeam1Win - (1-prediction))/oddOnTeam1Win)*100
prediction = 1 - prediction
betAmountOnTeam2 = ((prediction*oddOnTeam2Win - (1-prediction))/oddOnTeam2Win)*100

In [None]:
print("Recommended Amount on", team1, betAmountOnTeam1, "€")
print("Recommended Amount on", team2, betAmountOnTeam2, "€")

In [1476]:
expectedOnTeam1 = (betAmountOnTeam1 * oddOnTeam1Win *(1-prediction) - betAmountOnTeam1 * prediction) - 0.6
expectedOnTeam2 = (betAmountOnTeam2 * oddOnTeam2Win *prediction - betAmountOnTeam2 * (1-prediction)) - 0.6

In [1477]:
expectedOnTeam1

2.2899697369455345

In [1478]:
expectedOnTeam2

14.795576343633149