# Data Preparation

This notebook does the first data preparation and adds the data points depending on game result.

In [1]:
import pandas as pd
import numpy as np
import csv

In [2]:
# read and prepare data 
df = pd.read_csv('results.csv', delimiter=",", quoting=csv.QUOTE_NONE, encoding='utf-8')
df = df.drop('Unnamed: 0', axis=1)
df = df.drop(307).drop(235).reset_index()
df = df.drop(['home_goals_half','away_goals_half'], axis=1)
df.loc[(df.index == 234),'matchweek']=27

In [3]:
# 1 point for draw
df.loc[df["home_goals"] == df["away_goals"], 'home_points'] = 1
df.loc[df["home_goals"] == df["away_goals"], 'away_points'] = 1

# 3 points for win 
df.loc[df["home_goals"] > df["away_goals"], 'home_points'] = 3
df.loc[df["home_goals"] < df["away_goals"], 'away_points'] = 3

# 0 points for loose
df.loc[df["home_goals"] > df["away_goals"], 'away_points'] = 0
df.loc[df["home_goals"] < df["away_goals"], 'home_points'] = 0

In [5]:
# split df in 2 rounds
df_hin = df[df['matchweek']<=17]
df_rück = df[df['matchweek']>17]

# sort games 
df_hin = df_hin.sort_values(by=['matchweek','home_team'])
df_rück = df_rück.sort_values(by=['matchweek','away_team'])

# combine dfs 
result = pd.concat([df_hin,df_rück]).reset_index()

In [6]:
# transform team names to numbers
teams = result['home_team'].drop_duplicates().to_dict()

for team in teams: 
    result.loc[result["home_team"] == teams[team], 'home_team_number'] = team
    result.loc[result["away_team"] == teams[team], 'away_team_number'] = team

In [7]:
# set game number for each game 
result = result.drop(['level_0'], axis=1)
result['game_number'] = result.index

In [8]:
result.to_csv('results_prepared')