In [12]:
import pandas as pd
import yaml
from datetime import date, timedelta

### User defined
import variables_n_functions as vnf

config_file = open('config.yaml', 'r')
config = yaml.safe_load(config_file)

leagues = config['leagues']

end = date.today()
start = end - timedelta(7)

end = end.strftime("%Y-%m-%d")
start = start.strftime("%Y-%m-%d")

between = start + ',' + end

### We initialize the df with the appropriate column names
df = pd.DataFrame(columns = vnf.columnas_df)

for league in leagues.keys():    

    print('----------------------------------------------------')
    print('------------- ', leagues[league]['name'] , ' -------------')
    print('----------------------------------------------------\n')
    
    print('----------------------------------------------------')
    print('------------- Period: ', start, ' to ', end , ' -------------')
    print('----------------------------------------------------\n')
    
    teams = leagues[league]['teams']
    
    ### The following variable is auxiliar to avoid duplicate requests
    teams_aux = list(teams.keys())    
    
    ### We recover the match history between every unique team - team combination, and store it in the df
    for team_1 in teams.keys():
        
        teams_aux.remove(team_1)
        
        for team_2 in teams_aux:
            
            print(f'Retreiving historical matches between {teams[team_1]} and {teams[team_2]}...')
            h2h = vnf.head2head(team_1, team_2, config['sports_token'], between)
            
            if h2h is not None:
                
                h2h = [pd.DataFrame(pd.Series(h2h[k])).transpose() for k in range(len(h2h))]
                df = pd.concat([df] + h2h)
                print(f'Total Matches: {len(h2h)}\n')
            
            else:
                
                print('Total Matches: 0\n')        

----------------------------------------------------
-------------  Premier League  -------------
----------------------------------------------------

----------------------------------------------------
------------- Period:  2022-04-27  to  2022-05-04  -------------
----------------------------------------------------

Retreiving historical matches between West Ham United and Tottenham Hotspur...
Total Matches: 0

Retreiving historical matches between West Ham United and Liverpool...
Total Matches: 0

Retreiving historical matches between West Ham United and Manchester City...
Total Matches: 0

Retreiving historical matches between West Ham United and Everton...
Total Matches: 0

Retreiving historical matches between West Ham United and Manchester United...
Total Matches: 0

Retreiving historical matches between West Ham United and Aston Villa...
Total Matches: 0

Retreiving historical matches between West Ham United and Chelsea...
Total Matches: 0

Retreiving historical matches bet

KeyboardInterrupt: 

In [8]:
# RENAME COLUMN due to reserved keyword
df.rename(columns = {'time':'time_data'}, inplace = True)
df["match_day"]=df["time_data"].apply(lambda x: x["starting_at"]["date"])

In [9]:
import mysql.connector

client = mysql.connector.connect(**config['connection'])
cursor = client.cursor()

#### Create DB and Tables

with open('sql/create_h2h_db.sql') as ddl:
    cursor.execute(ddl.read())

with open('sql/create_h2h_source.sql') as ddl:
    cursor.execute(ddl.read())


In [16]:
def list_of_tuples(df):
    
    all_values = []
    
    for k in range(df.shape[0]):
        temp = df.iloc[k]
        temp = temp.astype(str)
        temp = tuple(temp)
        all_values.append(temp)
        
    return all_values

source_values = list_of_tuples(df)


for value in source_values:
    with open('sql/insert_h2h_source.sql') as dml:
        try:
            cursor.execute(dml.read(), value)
            dml.close()
        except mysql.connector.IntegrityError as err:
            print("Something went wrong: {}".format(err))
            dml.close()
            pass

client.commit()

In [20]:
pd.read_sql('SELECT * FROM h2h.model2', con=client)

Unnamed: 0,id,Y,league_id,season_id,venue_id,referee_id,localteam_id,visitorteam_id,localteam_position,visitorteam_position,match_day
0,18138879,1.0,8,18378,281313,14532,6,1,7.0,6.0,2022-03-20
1,18138692,1.0,8,18378,214,15294,1,6,7.0,5.0,2021-10-24
2,16943055,1.0,8,17420,214,15293,1,6,5.0,9.0,2021-02-21
3,16924643,0.0,8,17420,281313,15294,6,1,7.0,13.0,2020-10-18
4,11867534,1.0,8,16036,281313,15293,6,1,7.0,17.0,2020-06-23
...,...,...,...,...,...,...,...,...,...,...,...
3372,18138793,1.0,8,18378,338817,15273,236,25,13.0,17.0,2021-12-10
3373,16953819,1.0,9,17428,338817,15270,236,25,3.0,2.0,2021-05-01
3374,16953507,0.0,9,17428,19,14805,25,236,4.0,6.0,2020-12-15
3375,16797365,0.0,9,24,14267,,236,25,-1.0,-1.0,2015-02-10
