__Author:__ Marijse

__Aim:__ loading the team match records in event in all time into the local postgres instance. 

__Prerequisites:__
- Have the tournament id dictionary loaded into _0_original_data;

In [1]:
# import the necessary packages
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
import psycopg2
import sqlalchemy
from sqlalchemy import create_engine
import glob
from datetime import datetime
import urllib.request
import requests
import io


In [2]:
# Set up the database connection wiht Psycopg2
db = psycopg2.connect(dbname='Rugby', user='postgres', host='localhost', password='password')
cursor=db.cursor()

# Set up a database connection using sqlalchemy
engine = create_engine('postgres://postgres:password@localhost:5432/Rugby')

In [3]:
# Define variables

# Define the input and output strings
input_string = '../_4_data_extracts/squad/'
output_strig = '../_6_data_clean/'

# Define schemas
schema1 = '_0_original_data'
schema2 = '_1_data_views'

In [4]:
cursor.execute("CREATE SCHEMA IF NOT EXISTS " + schema1)
cursor.execute("CREATE SCHEMA IF NOT EXISTS " + schema2)
db.commit()

### Reading Match Fixtures into datafrmae

In [5]:
# Step 2: we run the function
def xml2df(xml_data):
    root = ET.XML(xml_data) # element tree
    all_records = []
    for i, child in enumerate(root):
        record = {}
        for subchild in child:
            record[subchild.tag] = subchild.text
            all_records.append(record)
    return pd.DataFrame(all_records)

In [6]:
# List all event id's of the past two years
eventid = pd.read_sql_query('select eventid from _0_original_data.tournament_id_dictionary',db)
eventid_list = list(eventid.eventid.unique())

# List the methods available for this option
terminology_list = ['Try', 'Con', 'Pen', 'DG', 'Points', 'LPoints', 'WMargin', 'LMargin', 'Red', 'Yellow']
terminology_list

['Try',
 'Con',
 'Pen',
 'DG',
 'Points',
 'LPoints',
 'WMargin',
 'LMargin',
 'Red',
 'Yellow']

In [7]:
# Define an empty dataframe to append the different xml files to
team_match_records_in_event_all_time = pd.DataFrame()

for k in eventid_list:
    
    print(k)
    
    for n in terminology_list:    
        url = 'http://webservices.irb.com/EventInformation.asmx/TeamMatchRecordsInEventAllTime?uid=e9656db8-ffb5-4115-ac0b-cbd5688e6648&eventid='+k+'&method='+n
        response = urllib.request.urlopen(url)
        data = response.read()      
        text = data.decode('utf-8') 
        
        print(n)
    
        # Create a df which equals the text object
        df = xml2df(text)
        
        # Skip all empty Event ID's
        if df.empty == True:
            continue
        else:
            pass
        
        print('we got this far')

        # Extract the tournament id from the file name
        tournament_id =  k
        method = n
        
        # Set a column equal to the file name
        df['tournament_id'] = tournament_id
        df['method'] = method
    
        # Clean the column headers 
        dict_columns={}
        for x in (df.columns.values):
            dict_columns[x] = x.lower().replace('{http://webservices.irb.com/}','')
        df_clean = df.rename(columns=dict_columns)
    
        # remove duplicates from the data
        df_clean = df_clean.drop_duplicates(['teamfor','record'], keep='first')
        
        # Append each individual dataframe to the full_fixtures df
        frames =(team_match_records_in_event_all_time,df_clean)
        team_match_records_in_event_all_time = pd.concat(frames)

1382
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1386
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1388
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1387
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1389
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoin

Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1422
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1426
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1419
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1423
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we

Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1464
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1463
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1466
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1459
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we

Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1522
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1499
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1494
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1496
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we

Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1629
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1611
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1612
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1613
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we

Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1790
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1791
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1792
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we got this far
WMargin
we got this far
LMargin
we got this far
Red
we got this far
Yellow
we got this far
1811
Try
we got this far
Con
we got this far
Pen
we got this far
DG
we got this far
Points
we got this far
LPoints
we

In [8]:
team_match_records_in_event_all_time.head()

Unnamed: 0,matchdate,opponent,record,result,teamfor,tournament_id,method
0,2000-03-24T18:00:00,China 7s,12,80 - 0,Fiji 7s,1382,Try
5,2000-04-01T17:40:00,Malaysia 7s,12,84 - 0,South Africa 7s,1382,Try
10,2002-02-09T14:33:00,Wales 7s,12,78 - 0,New Zealand 7s,1382,Try
20,2006-02-11T16:12:00,Mexico 7s,12,78 - 7,Canada 7s,1382,Try
25,2006-04-01T11:36:00,Singapore 7s,12,82 - 0,Scotland 7s,1382,Try


In [9]:
# The final table includes 20 tournaments and has 2680 records associated with 8 methods over the 20 different tournaments. 
team_match_records_in_event_all_time.tournament_id.nunique()
team_match_records_in_event_all_time.method.nunique()
len(team_match_records_in_event_all_time)

27495

In [10]:
# Extract to CSV
team_match_records_in_event_all_time.to_csv('../_6_data_clean/team_match_records_in_event_all_time.csv')

In [11]:
# Extract to SQL
table_name = 'team_match_records_in_event_all_time'
team_match_records_in_event_all_time.to_sql(schema=schema1, con=engine, if_exists='replace', name=table_name)
db.commit
db.close()