# 1. First, we want to import the necessary modules for the following three steps

In [28]:
# Modules required for Webscraping : beautifulsoup4, lxml, requests
from bs4 import BeautifulSoup
import urllib.request   
import requests

# Modules required for Data-Analysis and Machine Learning
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

# Modules required for App Interface
from tkinter import *
from tkinter.messagebox import *

# Module required to end script if error occurs
import sys

# 2. Secondly, we want to webscrape and download the data for the english "Premier League" and for the german "Bundesliga"

In [29]:
try: 
    # 1. Websrape data for Premier League
    # 1.1. get source code for England Football leagues of website using requests librarys
    source_pl = requests.get('http://www.football-data.co.uk/englandm.php').text
    
    # 1.2. pass it into beautiful soup to work with it, specifying the parser as lxml: standard parser
    soup = BeautifulSoup(source_pl, 'lxml')
    
    # 1.3. find all "a" tags in html code to generate download links
    code = soup.find_all('a')
    
    # 1.4. Loop through list of all "a" tags starting from link 73 to 83 in steps of 5  --> create download-
    #      url for each link --> download file 
    x = 1
    
    for i in range(73,84,5):       
        code2 = code[i]    
        download_url = 'http://www.football-data.co.uk/' + code2.get('href') # concatinate url 
        urllib.request.urlretrieve(download_url, f'PL{x}.csv')               #download url, name file
    
        x=x+1 #to name files accordingly by increasing numbers
    
    
    # 2. Websrape data for Bundesliga
    # 2.1. get source code of "Bundenliga" website using requests librarys 
    source_bl = requests.get('http://www.football-data.co.uk/germanym.php').text

    # 2.2. pass it into beautiful soup to work with it, specifying the parser as lxml: standard parser
    soup = BeautifulSoup(source_bl, 'lxml')
    
    # 1.3. find all "a" tags in html code to generate download links
    code = soup.find_all('a')

    # 1.4. Loop through list of all "a" tags starting from link 73 to 77 in steps of 2  --> create download-
    #      url for each link --> download file
    x = 1

    for i in range(73,78,2):       
        code2 = code[i]    
        download_url = 'http://www.football-data.co.uk/' + code2.get('href') # concatinate url 
        urllib.request.urlretrieve(download_url, f'BL{x}.csv')               #download url, name file
    
        x=x+1 #to name files accordingly by increasing numbers


# 3. if error: no internet connection: display error notification and terminate script
except:
    #messagebox.showinfo("Connection Error", "Please connect to internet, that most recent data can be downloaded.")
    root = Tk ()
    root.title("Connection Error")
    root.geometry('250x100')
    Label(root, text="Please connect to internet, \n that most recent data can be downloaded. \n try again!").grid(row=0, column = 0)
    mainloop ()
    sys.exit()

# 3. Thirdly, we want to Read data into pandas dataframe, prepare data for modelling and and train model on data

## 3.1. Prediction Model for Premier League

In [30]:
try: 
    # PREPARE DATA FOR MODELLING
    # 1. Import csv data; we only want to use the Goals and the Odds of Bet365 (B365) and BWin (BW)
    pl_df1 = pd.read_csv('PL1.csv',usecols=['FTHG','FTAG','B365H','B365D','B365A','BWH','BWD','BWA'])
    pl_df2 = pd.read_csv('PL2.csv',usecols=['FTHG','FTAG','B365H','B365D','B365A','BWH','BWD','BWA'])
    pl_df3 = pd.read_csv('PL3.csv',usecols=['FTHG','FTAG','B365H','B365D','B365A','BWH','BWD','BWA'])

    # 2. Concatinate Dataframes 
    pl_df = pd.concat([pl_df1,pl_df2,pl_df3], ignore_index=True)   

    # 3. Add Column "Goal Difference Home-Away"
    pl_df['Goal Difference Home-Away'] = pl_df.FTHG - pl_df.FTAG

    #Erase Columns: "Full-Time Goals Home Team" and "Full Time Goals Away Team"
    pl_df = pl_df.drop(['FTHG','FTAG'], axis = 1)


    # TRAIN MODEL ON DATA
    # 1. Select column of Dataframe that shall be forecasted (y) and columns that are used as predictors (X)
    X = pl_df[['B365H','B365D','B365A','BWH','BWD','BWA']]
    y = pl_df['Goal Difference Home-Away']

    # 2. Create linear regression model 
    lm_pl = LinearRegression()

    # 3. Fit Model on Data
    lm_pl.fit(X,y)
    
    
# ERROR HANDLING: If error in data: display message and end script
except: 
    root = Tk ()
    root.title("Data Error")
    root.geometry('250x100')
    Label(root, text="Premier League data cannot be worked with!").grid(row=0, column = 0)
    mainloop ()
    sys.exit()

## 3.2. Prediction Model for Bundesliga

In [31]:
try: 
    # PREPARE DATA FOR MODELLING
    # 1. Import csv data; we only want to use the Goals and the Odds of Bet365 (B365) and BWin (BW)
    bl_df1 = pd.read_csv('BL1.csv',usecols=['FTHG','FTAG','B365H','B365D','B365A','BWH','BWD','BWA'])
    bl_df2 = pd.read_csv('BL2.csv',usecols=['FTHG','FTAG','B365H','B365D','B365A','BWH','BWD','BWA'])
    bl_df3 = pd.read_csv('BL3.csv',usecols=['FTHG','FTAG','B365H','B365D','B365A','BWH','BWD','BWA'])

    # 2. Concatinate Dataframes 
    bl_df = pd.concat([bl_df1,bl_df2,bl_df3], ignore_index=True)   

    # 3. Add Column "Goal Difference Home-Away"
    bl_df['Goal Difference Home-Away'] = bl_df.FTHG - bl_df.FTAG

    # 4. Erase Columns: "Full-Time Goals Home Team" and "Full Time Goals Away Team"
    bl_df = bl_df.drop(['FTHG','FTAG'], axis = 1)
    
    # TRAIN MODEL ON DATA
    # 1. Select column of Dataframe that shall be forecasted (y) and columns that are used as predictors (X)
    A = bl_df[['B365H','B365D','B365A','BWH','BWD','BWA']]
    b = bl_df['Goal Difference Home-Away']
    
    # 2. Create linear regression model
    lm_bl = LinearRegression()
    
    # 3. Fit Model on Data
    lm_bl.fit(A,b)

    
except: 
    # ERROR HANDLING: If error in data: display message
    root = Tk ()
    root.title("Data Error")
    root.geometry('250x100')
    Label(root, text="Bundesliga data cannot be worked with!").grid(row=0, column = 0)
    mainloop ()
    sys.exit()

# 4. App Interface to interact with user and apply Bundesliga or Premier League prediction model

In [32]:
# 1. Define Function
def show_simulation():

    # 1.1 Delete previous content
    blank.delete(0, END)
    
    # 1.2. Get user input from Entry fields
    try: 
        B365H = float(num1.get())
        B365D = float(num2.get())
        B365A = float(num3.get())
        
        BWH = float(num4.get())
        BWD = float(num5.get())
        BWA = float(num6.get())
        
        # 1.3. If Input is non numerical: Error notification
    except:
        messagebox.showinfo("Error!", "Error! Fill out all fields with numerical values. Syntax: 1 or 1.5!")
    
    # 1.4. if input is appropriate: Form input to NumPy Array --> apply previouly generated model on input, 
    #      depending on OptionMenu selection (Bundesliga or Premier league) --> generate answer, depending on
    #      expected Home Team Goal Lead: if number negative, Away Team wins --> round results to get full goals
    #      --> Output answer in "blank" field
    
    else: 
        I = np.array([[B365H, B365D, B365A, BWH, BWD, BWA]])
        
        if league.get() == "Bundesliga": 
            Goals = int(round(lm_bl.predict(I)[0]))
        elif league.get() == "Premier League":
            Goals = int(round(lm_pl.predict(I)[0]))
        
        if Goals >0: 
            Ans = (f'Home Team Wins with {Goals} goals difference')
        elif Goals == 0: 
            Ans = ('Draw is expected')
        else: 
            Ans = (f'Away Team Wins with {Goals*-1} goals difference')
    
        blank.insert(0, Ans)
        
     
    
    
# 2. Defining Tkinter app interface

# 2.1. define Tkinter object --> give it title, icon and determine window size
main = Tk()
main.title("Football Outcome Predictor (v 1.0)")
main.iconbitmap("football.ico")
main.geometry('475x350')


# 2.2. To make it visually appealing, first column must have minimum size
main.grid_rowconfigure(0, minsize=50)

# 2.3. Defining fields in the grid

# 2.3.1. Option Menu: Assign stringvariable to selected option from OptionMenu (later needed) & set "Premier League"
#        as default
league=StringVar(main)
league.set('Premier League')
optionmenu = OptionMenu(main, league, "Bundesliga", "Premier League").grid(row=8, column = 2, sticky = W)#added own values to appear in spinbox and adjusted width


# 2.3.2 Title
Label(main, text = "You can type in odds of the Betting \n Houses to generate an outcome forecast!", font=(11)).grid(row=0, column = 1, columnspan = 3, sticky = W+E)

# 2.3.3. Labels of the grid: Bet365, BetWin, Home Team Win, Away Team win, Draw
photoB365 = PhotoImage(file="bet365logo.png") 
photoBWin = PhotoImage(file="bwinlogo.png")
Label(main, image = photoB365).grid(row=3, column = 0, sticky = E)
Label(main, image = photoBWin).grid(row=4, column = 0, sticky = E)
Label(main, text = "").grid(row=5)
Label(main, text = "Home Team Win").grid(row=2, column = 1)
Label(main, text = "Draw").grid(row=2, column = 2)
Label(main, text = "Away Team Win").grid(row=2, column = 3)


# 2.3.4. Entry Fields: Defition of fields --> configuration (green background) --> Location
num1 = Entry(main)
num2 = Entry(main)
num3 = Entry(main)
num4 = Entry(main)
num5 = Entry(main)
num6 = Entry(main)

num1.configure(background="#58d68d")
num2.configure(background="#58d68d")
num3.configure(background="#58d68d")
num4.configure(background="#58d68d")
num5.configure(background="#58d68d")
num6.configure(background="#58d68d")

num1.grid(row=3, column=1)
num2.grid(row=3, column=2)
num3.grid(row=3, column=3)

num4.grid(row=4, column=1)
num5.grid(row=4, column=2)
num6.grid(row=4, column=3)


# 2.3.5. League Icon Fields
photoBL = PhotoImage(file="bundesligalogo.png")
photoPL = PhotoImage(file="premierleaguelogo.png")
Label(main, image = photoBL).grid(row = 0, column = 0, sticky = E)
Label(main, image=photoPL).grid(row=0,column=4,stick = W)


# 2.3.6. Output field of estimated result
blank = Entry(main)
blank.grid(row=6, column=2, columnspan =2, sticky = W + E)


# 2.3.7. Placeholder cell, "League" Label and "Simulate Outcome" - Button
Label(main, text = "").grid(row=7, column = 1)

Label(main, text = "League:").grid(row=8, column = 1, pady=10, sticky = E)

Button(main, text='Simulate Outcome', command=show_simulation).grid(row=6, column=1, sticky=E)


# 2.3.8. Copyright row
Label(main,text = "© Gp04397 & Aleiandro").grid(row=9, column =0,columnspan = 6 , sticky = W+E)





# 3. Mainloop: to let it wait for events and update the GUI
mainloop()