In [1]:
# In case iPython does not find our personalized modules and we want to import them manually
# import sys
# sys.path.append('my/path/to/module/folder')
# import module_of_interest

# We can also make sure what's the main directory iPhython consider for running
# import os
# os.getcwd()

import constants
import requests
import pandas as pd
import numpy as np
import psycopg2
import csv
import datetime

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 50)


In [2]:
website_frame = pd.read_csv(constants.CSV_SOURCE_URL)

In [3]:
website_frame.shape

(380, 106)

In [4]:
# Remove unwanted columns

unwanted_cols = ["Div", "BWH", "BWD", "BWA", "IWH", "IWD", "IWA", "PSH", "PSD", "PSA", "WHH", "WHD", "WHA", "VCH", "VCD", "VCA",
                 "P>2.5", "P<2.5","AHh", "B365AHH", "B365AHA", "PAHH", "PAHA", "MaxAHH", "MaxAHA", "AvgAHH", "AvgAHA", "B365CH",
                 "B365CD", "B365CA", "BWCH", "BWCD", "BWCA", "IWCH", "IWCD", "IWCA", "PSCH", "PSCD", "PSCA", "WHCH", "WHCD", "WHCA",
                 "VCCH", "VCCD", "VCCA", "MaxCH", "MaxCD", "MaxCA", "AvgCH", "AvgCD", "AvgCA", "B365C>2.5", "B365C<2.5", "PC>2.5",
                 "PC<2.5", "MaxC>2.5", "MaxC<2.5", "AvgC>2.5", "AvgC<2.5", "AHCh", "B365CAHH", "B365CAHA", "PCAHH", "PCAHA", "MaxCAHH",
                 "MaxCAHA", "AvgCAHH", "AvgCAHA"]

website_frame.drop(columns = unwanted_cols, inplace = True)

In [5]:
# Rename columns
website_frame.rename(columns = {"FTHG": "FullTimeHomeTeamGoals",
                               "FTAG": "FullTimeAwayTeamGoals",
                               "FTR": "FullTimeResult",
                               "HTHG": "HalfTimeHomeTeamGoals",
                               "HTAG": "HalfTimeAwayTeamGoals",
                               "HTR": "HalfTimeResult",
                               "HS": "HomeTeamShots",
                               "AS": "AwayTeamShots",
                               "HST": "HomeTeamShotsOnTarget",
                               "AST": "AwayTeamShotsOnTarget",
                               "HF": "HomeTeamFouls",
                               "AF": "AwayTeamFouls",
                               "HC": "HomeTeamCorners",
                               "AC": "AwayTeamCorners",
                               "HY": "HomeTeamYellowCards",
                               "AY": "AwayTeamYellowCards",
                               "HR": "HomeTeamRedCards",
                               "AR": "AwayTeamRedCards",
                               "B365H": "B365HomeTeam",
                               "B365D": "B365Draw",
                               "B365A": "B365AwayTeam",
                               "MaxH": "MarketMaxHomeTeam",
                               "MaxD": "MarketMaxDraw",
                               "MaxA": "MarketMaxAwayTeam",
                               "AvgH": "MarketAvgHomeTeam",
                               "AvgD": "MarketAvgDraw",
                               "AvgA": "MarketAvgAwayTeam",
                               "B365>2.5": "B365Over2.5Goals",
                               "B365<2.5": "B365Under2.5Goals",
                               "Max>2.5": "MarketMaxOver2.5Goals",
                               "Max<2.5": "MarketMaxUnder2.5Goals",
                               "Avg>2.5": "MarketAvgOver2.5Goals",
                               "Avg<2.5": "MarketAvgUnder2.5Goals"},
                   inplace = True)


In [6]:
# Add season column

website_frame.insert(0, "Season", constants.CURRENT_SEASON_TAG)

In [7]:
# Add MatchWeek value

website_frame.insert(1, "MatchWeek", constants.DEFAULT_MATCHWEEK)

In [11]:
# Add Points columns

conditions = [
     website_frame["FullTimeResult"] == 'H',
     website_frame["FullTimeResult"] == 'D',
     website_frame["FullTimeResult"] == 'A'
]

home_points = [ 3, 1, 0]
away_points = [ 0, 1, 3]

website_frame["HomeTeamPoints"] = np.select(conditions, home_points)
website_frame["AwayTeamPoints"] = np.select(conditions, away_points)

In [15]:
website_frame.shape

(380, 42)

In [12]:
# Stablish a connection to Database data source and fetch all matches stored from current season

try:
    connection = psycopg2.connect(
        host = "localhost",
        port = "5432",
        user = "admin",
        password = "root",
        database = "premier_league"
    )
except psycopg2.Error as e:
    print ('Can not connect to the postgress database "premier_league". Make sure database server is running')
    print (e)
else:
    print ('Connection to database "premier_league" stablished. Listening at port 5432')

season_query = f"SELECT * FROM public.match_history WHERE \"Season\" = '{constants.CURRENT_SEASON_TAG}'"

cursor = connection.cursor()
cursor.execute(season_query)
matches_in_db = cursor.fetchall()

Connection to database "premier_league" stablished. Listening at port 5432


In [13]:
# Copy cursor into a dataframe
postgres_frame = pd.DataFrame(data = matches_in_db, columns = website_frame.columns)

In [26]:
website_frame.compare(postgres_frame)

ValueError: Can only compare identically-labeled (both index and columns) DataFrame objects

In [27]:
# Copy depurated matchweeks from

website_frame.index


RangeIndex(start=0, stop=380, step=1)

In [28]:
postgres_frame.index

RangeIndex(start=0, stop=370, step=1)

In [None]:
# Now let's get only new match entries by contrasting what is comming from website datasource vs what DataBase has


In [None]:
type(matches_in_db)

In [None]:
website_frame.head()