In [1]:
import pandas as pd
import DataGathering.FileHandling as files
import jellyfish

In [2]:
def unifyDates(date: str) -> str:
    if len(date) == 0:
        return "0000/00/00"
    if "." in date:
        date = date.split(".")
    elif "/" in date:
        date = date.split("/")
    if len(date[0]) != 4:
        date = date[::-1]
    return "/".join(date)

In [3]:
def getSeason(unifiedDate: str) -> str:
    ymd = unifiedDate.split("/")
    year = int(ymd[0])
    month = int(ymd[1])
    if month >= 10:
        return str(year)+"/"+str(year+1)
    elif month <= 3:
        return str(year-1)+"/"+str(year)
    else:
        return "Off-Season"

In [4]:
teamNames = files.readFileToList("Data/teamNames.txt")
def getClosestTeamName(teamName: str) -> str:
    return sorted(teamNames, key=lambda savedName: jellyfish.jaro_similarity(savedName, teamName), reverse=True)[0]

In [5]:
playerNames = files.readFileToList("Data/playerNames.txt")
def getClosestPlayerName(playerName: str) -> str:
    closest = sorted(playerNames, key=lambda savedName: jellyfish.levenshtein_distance(savedName, playerName))[0]
    similarity = jellyfish.jaro_similarity(closest, playerName)
    if similarity > 0.7:
        return closest
    return "No Match"

In [6]:
goodShits = ["PointsTotal", "PointsDelta", "ServeTotal", "ServeError", "ServePoints", "ReceptionTotal", "ReceptionError", "AttackTotal", "AttackError", "AttackBlocked", "AttackPoints", "BlockPoints"]
def getPlayersFromTeam(game, teamNumber):
    players = {}
    for playerNumber in range(1,15):
        scores = []
        for goodShit in goodShits:
            value = game["{}Player{}Team{}".format(goodShit, playerNumber, teamNumber)]
            if type(value) == str:
                value = 0
            scores.append(value)
        playerName = game["NamePlayer{}Team{}".format(playerNumber, teamNumber)]
        if playerName != "":
            players[playerName] = scores
    return players

In [7]:
games = []
for gameCSVPath in files.getAllFileNamesIn("Data/CSVs/"):
    game = files.readGameCSV(gameCSVPath)
    games.append(game)

# GamesOnly

In [8]:
gamesOnly = pd.DataFrame([], columns = ["Date","GameID","Season","M/F","Team1","Team2","Score1","Score2",
                                        "Player1Team1",
                                        "Player2Team1",
                                        "Player3Team1",
                                        "Player4Team1",
                                        "Player5Team1",
                                        "Player6Team1",
                                        "Player7Team1",
                                        "Player8Team1",
                                        "Player9Team1",
                                        "Player10Team1",
                                        "Player11Team1",
                                        "Player12Team1",
                                        "Player13Team1",
                                        "Player14Team1"
                                        "Player1Team2",
                                        "Player2Team2",
                                        "Player3Team2",
                                        "Player4Team2",
                                        "Player5Team2",
                                        "Player6Team2",
                                        "Player7Team2",
                                        "Player8Team2",
                                        "Player9Team2",
                                        "Player10Team2",
                                        "Player11Team2",
                                        "Player12Team2",
                                        "Player13Team2",
                                        "Player14Team2"])

In [9]:
gamesOnly

Unnamed: 0,Date,GameID,Season,M/F,Team1,Team2,Score1,Score2,Player1Team1,Player2Team1,...,Player5Team2,Player6Team2,Player7Team2,Player8Team2,Player9Team2,Player10Team2,Player11Team2,Player12Team2,Player13Team2,Player14Team2


In [10]:
for game in games:
    d = {}
    date = unifyDates(game["Date"])
    d["Date"] = date
    gameID = game["MatchNumber"]
    d["GameID"] = gameID
    season = getSeason(date)
    d["Season"] = season
    team1 = getClosestTeamName(game["NameTeam1"])
    team2 = getClosestTeamName(game["NameTeam2"])
    d["Team1"] = team1
    d["Team2"] = team2
    score1 = int(game["ScoreTeam1"])
    score2 = int(game["ScoreTeam2"])
    d["Score1"] = score1
    d["Score2"] = score2
    mf = "-"
    if "Män" in game["League"]:
        mf = "M"
    elif "Fra" in game["League"]:
        mf = "F"
    d["M/F"] = mf
    playersTeam1 = list(getPlayersFromTeam(game, 1).keys())
    playersTeam2 = list(getPlayersFromTeam(game, 2).keys())
    for i in range(1, 15):
        player = "-"
        if len(playersTeam1) > i:
            player = playersTeam1[i-1]
            player = getClosestPlayerName(player)
        d["Player{}Team1".format(i)] = player
    for i in range(1, 15):
        player = "-"
        if len(playersTeam2) >= i:
            player = playersTeam2[i-1]
            player = getClosestPlayerName(player)
        d["Player{}Team2".format(i)] = player
        
    gamesOnly = gamesOnly.append(d, ignore_index=True)

KeyError: 'ServeErrorPlayer1Team2'

In [None]:
gamesOnly

# Players

In [None]:
playerStatsPerGame = pd.DataFrame([], columns = ["PlayerName","M/F","PlayerTeam","OpposingTeam","Score1","Score2","Date","GameID","Season","PointsTotal", "PointsDelta", "ServeTotal", "ServeError", "ServePoints", "ReceptionTotal", "ReceptionError", "AttackTotal", "AttackError", "AttackBlocked", "AttackPoints", "BlockPoints"])

In [None]:
for game in games:
    date = unifyDates(game["Date"])
    gameID = game["MatchNumber"]
    season = getSeason(date)
    team1 = getClosestTeamName(game["NameTeam1"])
    team2 = getClosestTeamName(game["NameTeam2"])
    score1 = int(game["ScoreTeam1"])
    score2 = int(game["ScoreTeam2"])
    mf = "-"
    if "Män" in game["League"]:
        mf = "M"
    elif "Fra" in game["League"]:
        mf = "F"
    playersTeam1 = getPlayersFromTeam(game, 1)
    playersTeam2 = getPlayersFromTeam(game, 2)
    for playerName, stats in playersTeam1.items():
        playerName = getClosestPlayerName(playerName)
        row = {"PlayerName":playerName,
               "M/F":mf,
               "PlayerTeam":team1,
               "OpposingTeam":team2,
               "Score1":score1,
               "Score2":score2,
               "Date":date,
               "GameID":gameID,
               "Season":season,
               "PointsTotal":stats[0],
               "PointsDelta":stats[1],
               "ServeTotal":stats[2],
               "ServeError":stats[3],
               "ServePoints":stats[4],
               "ReceptionTotal":stats[5],
               "ReceptionError":stats[6],
               "AttackTotal":stats[7],
               "AttackError":stats[8],
               "AttackBlocked":stats[9],
               "AttackPoints":stats[10],
                "BlockPoints":stats[11]}
        playerStatsPerGame = playerStatsPerGame.append(row, ignore_index=True)
        
    for playerName, stats in playersTeam2.items():
        playerName = getClosestPlayerName(playerName)
        row = {"PlayerName":playerName,
               "M/F":mf,
               "PlayerTeam":team2,
               "OpposingTeam":team1,
               "Score1":score2,
               "Score2":score1,
               "Date":date,
               "GameID":gameID,
               "Season":season,
               "PointsTotal":stats[0],
               "PointsDelta":stats[1],
               "ServeTotal":stats[2],
               "ServeError":stats[3],
               "ServePoints":stats[4],
               "ReceptionTotal":stats[5],
               "ReceptionError":stats[6],
               "AttackTotal":stats[7],
               "AttackError":stats[8],
               "AttackBlocked":stats[9],
               "AttackPoints":stats[10],
                "BlockPoints":stats[11]}
        playerStatsPerGame = playerStatsPerGame.append(row, ignore_index=True)
        

In [None]:
playerStatsPerGame

In [None]:
playerStatsPerGame[playerStatsPerGame["PlayerName"] == "Grankin Sergey"]

In [None]:
playersStatsPerSet = pd.DataFrame([], columns=["PlayerName", "M/F", "PlayerTeam", "PointsTotal", "PointsDelta", "ServeTotal", "ServeError", "ServePoints", "ReceptionTotal", "ReceptionError", "AttackTotal", "AttackError", "AttackBlocked", "AttackPoints", "BlockPoints"])

In [None]:
playersStatsPerSet

In [None]:
values = ["PointsTotal", "PointsDelta", "ServeTotal", "ServeError", "ServePoints", "ReceptionTotal", "ReceptionError", "AttackTotal", "AttackError", "AttackBlocked", "AttackPoints", "BlockPoints"]

In [None]:
for playerName in playerNames:
    d = {}
    d["PlayerName"] = playerName
    allStatsOfPlayer = playerStatsPerGame[playerStatsPerGame["PlayerName"] == playerName]
    if allStatsOfPlayer.size == 0:
        continue
    mf = allStatsOfPlayer.iloc[0]["M/F"]
    d["M/F"] = mf
    thisSeasonStats = allStatsOfPlayer[allStatsOfPlayer["Season"] == "2020/2021"]
    if thisSeasonStats.size == 0:
        continue
    team = thisSeasonStats.iloc[0]["PlayerTeam"].replace('"', '')
    d["PlayerTeam"] = team
    playerTeamScoreSum = allStatsOfPlayer["Score1"].sum()
    opposingTeamsScoreSum = allStatsOfPlayer["Score2"].sum()
    for value in values:
        s = allStatsOfPlayer[value].sum()
        d[value] = s / (playerTeamScoreSum+opposingTeamsScoreSum)
    playersStatsPerSet = playersStatsPerSet.append(d, ignore_index=True)

In [None]:
playersStatsPerSet

In [None]:
playersStatsPerSet.to_csv('PlayerPerformances.csv')
playersStatsPerSet.to_excel("PlayerPerformances.xlsx")

# Learning

In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor

In [None]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [None]:
winOrLose = lambda val: 1 if val > 0.5 else 0

In [None]:
gamesOnly

In [None]:
Y = gamesOnly["Score1"].to_numpy() / (gamesOnly["Score1"].to_numpy() + gamesOnly["Score2"].to_numpy())

In [None]:
X = []
for i in range(0, gamesOnly.shape[0]):
    game = gamesOnly.iloc[i]
    dataPoint = np.zeros((2,12))
    for teamNumber in range(1,3):
        teamStrength = np.zeros((1,12))
        playerCounter = 0
        for playerNumber in range(1,15):
            playerNTeamMName = game["Player{}Team{}".format(playerNumber, teamNumber)]
            if playerNTeamMName == "-":
                continue
            playerStrength = playersStatsPerSet[playersStatsPerSet["PlayerName"] == playerNTeamMName].drop(["PlayerName", "M/F", "PlayerTeam"], axis=1)
            playerStrength = playerStrength.to_numpy()
            if np.size(playerStrength, 0) == 0:
                continue
            playerCounter += 1
            teamStrength += playerStrength
        dataPoint[teamNumber-1] = teamStrength / playerCounter
    X.append(dataPoint.flatten())

In [None]:
Y.shape

In [None]:
Y = Y.astype(float)

In [None]:
X = np.array(X)

In [None]:
X.shape

In [None]:
X

In [None]:
X, Y = unison_shuffled_copies(X, Y)

In [None]:
# needed for classifiers
#Y = np.array([winOrLose(p) for p in Y]).astype(int)

In [None]:
ind = int(X.shape[0] * 1)

In [None]:
X_train = X[:ind]
Y_train = Y[:ind]
X_test = X[ind:]
Y_test = Y[ind:]

In [None]:
if X_test.size == 0:
    X_test = X
    Y_test = Y

In [None]:
lin_reg = LinearRegression()
#log_reg = LogisticRegression()
#rndF_clf = RandomForestClassifier(max_depth=2)
#rndF_reg = RandomForestRegressor(n_estimators=300, max_depth=2)

model = lin_reg
model.fit(X_train, Y_train) 

In [None]:
def predict(nameTeam1, nameTeam2):
    playersTeam1 = playersStatsPerSet[playersStatsPerSet["PlayerTeam"] == nameTeam1]
    playersTeam2 = playersStatsPerSet[playersStatsPerSet["PlayerTeam"] == nameTeam2]
    teamStrength1 = playersTeam1.drop(["PlayerName" ,"M/F", "PlayerTeam"], axis = 1).sum(axis=0).to_numpy() / playersTeam1.shape[0]
    teamStrength2 = playersTeam2.drop(["PlayerName" ,"M/F", "PlayerTeam"], axis = 1).sum(axis=0).to_numpy() / playersTeam2.shape[0]
    X = np.concatenate((teamStrength1, teamStrength2))
    return model.predict([X])

In [None]:
predict("BERLIN RECYCLING Volleys", "VfB Friedrichshafen")

In [None]:
pred = model.predict(X_test)

In [None]:
pred = np.array([winOrLose(p) for p in pred])
real = np.array([winOrLose(p) for p in Y_test])

# n of theoretical correct predictions

In [None]:
corr = np.sum(pred == real)
print(corr)

# n of false predictions

In [None]:
fal = real.shape[0] - corr
print(fal)

In [None]:
real.shape[0]

# Predictor

In [None]:
team1 = "Dresdner SC"
team2 = "VC Wiesbaden"
oddOnTeam1Win = 1.01
oddOnTeam2Win = 6.25
capital = 50

In [None]:
prediction = predict(team1, team2)[0]
print(prediction)

In [None]:
betAmountOnTeam1 = ((prediction*oddOnTeam1Win - (1-prediction))/oddOnTeam1Win)*capital
prediction = 1 - prediction
betAmountOnTeam2 = ((prediction*oddOnTeam2Win - (1-prediction))/oddOnTeam2Win)*capital

In [None]:
print("Recommended Amount on", team1, np.round(betAmountOnTeam1,2), "€")
print("Recommended Amount on", team2, np.round(betAmountOnTeam2,2), "€")

In [None]:
expectedOnTeam1 = (betAmountOnTeam1 * oddOnTeam1Win *(1-prediction) - betAmountOnTeam1 * prediction) - 0.6
expectedOnTeam2 = (betAmountOnTeam2 * oddOnTeam2Win *prediction - betAmountOnTeam2 * (1-prediction)) - 0.6

In [None]:
print("Expected Delta on", team1,"is", np.round(expectedOnTeam1, 2), "€")
print("Expected Delta on", team2,"is", np.round(expectedOnTeam2, 2), "€")

# Betting Excel Sheet

In [None]:
gamePlanMale = pd.read_excel("SpielPlanM.xlsx")
gamePlanFemale = pd.read_excel("SpielPlanF.xlsx")

In [None]:
gamePlanMale.columns

In [None]:
toDrop = ['Uhrzeit', 'Wochentag', 'ST',
          'Gastgeber', 'Austragungsort/Ergebnis',
          'Austragungsort', 'Ergebnis', 'Saison', 'Spielrunde', 'Geschlecht',
          'Satzpunkte', 'Ballpunkte', 'Spieldauer', 'Zuschauerzahl',
          'Satzpunkte 1', ':', 'Satzpunkte 2', 'Satz 1 - Ballpunkte 1', ':.1',
          'Satz 1 - Ballpunkte 2', 'Satz 1 - Satzdauer', 'Satz 2 - Ballpunkte 1',
          ':.2', 'Satz 2 - Ballpunkte 2', 'Satz 2 - Satzdauer',
          'Satz 3 - Ballpunkte 1', ':.3', 'Satz 3 - Ballpunkte 2',
          'Satz 3 - Satzdauer', 'Satz 4 - Ballpunkte 1', ':.4',
          'Satz 4 - Ballpunkte 2', 'Satz 4 - Satzdauer', 'Satz 5 - Ballpunkte 1',
          ':.5', 'Satz 5 - Ballpunkte 2', 'Satz 5 - Satzdauer']

In [None]:
gamePlanMale = gamePlanMale.drop(toDrop, axis = 1)
gamePlanFemale = gamePlanFemale.drop(toDrop, axis = 1)

In [None]:
gamePlanMale = gamePlanMale.astype(str)
gamePlanFemale = gamePlanFemale.astype(str)

In [None]:
gamePlanMale["Vorhersage"] = 0
gamePlanFemale["Vorhersage"] = 0

In [None]:
gamePlanMale.columns

In [None]:
for indx in range(0, gamePlanMale.shape[0]):
    row = gamePlanMale.loc[indx]
    print(row)
    team1 = getClosestTeamName(row['Mannschaft 1'])
    team2 = getClosestTeamName(row['Mannschaft 2'])
    prediction = predict(team1, team2)
    gamePlanMale.at[indx, ["Vorhersage"]] = prediction
    
for indx in range(0, gamePlanFemale.shape[0]):
    row = gamePlanFemale.loc[indx]
    team1 = getClosestTeamName(row['Mannschaft 1'])
    team2 = getClosestTeamName(row['Mannschaft 2'])
    prediction = predict(team1, team2)[0]
    gamePlanFemale.at[indx, ["Vorhersage"]] = prediction
    