**Module that imports data of spanish La Liga(1st and 2nd division).<br>Source: https://www.football-data.co.uk/**

## Libraries

In [1]:
import urllib3 # URL request library
import certifi # Certifications library for secure url requests

from pathlib import Path # Path manipulation
import shutil # high-level operations on files and collections of files

from IPython.display import Markdown, display # Style output display in jupyter notebook

import os # OS library
import zipfile # zip manipulation library

import pandas as pd # Data import, manipulation and processing 

from data_functions import * # Private library of functions related to La Liga Dataset
from data_download import * # Private library of functions for downloading the data

## Variables

In [2]:
data_folder = Path("../data")
matches_folder = data_folder / 'matches'

url = 'http://www.football-data.co.uk/spainm.php'

## Execution

In [3]:
get_data(url, matches_folder, True)

Getting 19-20_SP1.csv data
Getting 19-20_SP2.csv data
Getting 18-19_SP1.csv data
Getting 18-19_SP2.csv data
Getting 17-18_SP1.csv data
Getting 17-18_SP2.csv data
Getting 16-17_SP1.csv data
Getting 16-17_SP2.csv data
Getting 15-16_SP1.csv data
Getting 15-16_SP2.csv data
Getting 14-15_SP1.csv data
Getting 14-15_SP2.csv data
Getting 13-14_SP1.csv data
Getting 13-14_SP2.csv data
Getting 12-13_SP1.csv data
Getting 12-13_SP2.csv data
Getting 11-12_SP1.csv data
Getting 11-12_SP2.csv data
Getting 10-11_SP1.csv data
Getting 10-11_SP2.csv data
Getting 09-10_SP1.csv data
Getting 09-10_SP2.csv data
Getting 08-09_SP1.csv data
Getting 08-09_SP2.csv data
Getting 07-08_SP1.csv data
Getting 07-08_SP2.csv data
Getting 06-07_SP1.csv data
Getting 06-07_SP2.csv data
Getting 05-06_SP1.csv data
Getting 05-06_SP2.csv data
Getting 04-05_SP1.csv data
Getting 04-05_SP2.csv data
Getting 03-04_SP1.csv data
Getting 03-04_SP2.csv data


## Save File

In [4]:
df = get_df(matches_folder)
filename = '03_20_SP.csv'
file_path = data_folder / filename
df.to_csv(file_path, index = False)
df

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,division,...,AST,HF,AF,HC,AC,HY,AY,HR,AR,Time
0,2016-08-19,Almeria,Cadiz,1.0,1.0,D,0.0,0.0,D,2,...,,,,,,,,,,
1,2016-08-19,Mirandes,Getafe,1.0,1.0,D,0.0,0.0,D,2,...,,,,,,,,,,
2,2016-08-20,Alcorcon,Huesca,0.0,0.0,D,0.0,0.0,D,2,...,,,,,,,,,,
3,2016-08-20,Cordoba,Tenerife,1.0,0.0,H,1.0,0.0,H,2,...,,,,,,,,,,
4,2016-08-20,Elche,Vallecano,2.0,1.0,H,2.0,1.0,H,2,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14080,2006-06-17,Sp Gijon,Murcia,1.0,0.0,H,0.0,0.0,D,2,...,,,,,,,,,,
14081,2006-06-17,Tenerife,Albacete,1.0,1.0,D,0.0,0.0,D,2,...,,,,,,,,,,
14082,2006-06-17,Valladolid,Elche,2.0,2.0,D,0.0,2.0,A,2,...,,,,,,,,,,
14083,2006-06-17,Xerez,Malaga B,4.0,2.0,H,4.0,1.0,H,2,...,,,,,,,,,,


## Modified Save not NaN

In [7]:
df = read_data(file_path)
df = drop_na(df, 2).drop(columns=['Time'])
df = add_season(df)
df = add_jornada(df)
df

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,division,...,HF,AF,HC,AC,HY,AY,HR,AR,season,jornada
10335,2005-08-27,Alaves,Barcelona,0.0,0.0,D,0.0,0.0,D,1,...,17.0,19.0,3.0,7.0,0.0,1.0,0.0,0.0,05-06,1
10336,2005-08-27,Ath Bilbao,Sociedad,3.0,0.0,H,0.0,0.0,D,1,...,13.0,19.0,3.0,4.0,0.0,1.0,0.0,0.0,05-06,1
10337,2005-08-27,Valencia,Betis,1.0,0.0,H,0.0,0.0,D,1,...,18.0,14.0,8.0,5.0,2.0,3.0,0.0,0.0,05-06,1
10338,2005-08-28,Ath Madrid,Zaragoza,0.0,0.0,D,0.0,0.0,D,1,...,16.0,22.0,8.0,4.0,2.0,7.0,0.0,0.0,05-06,1
10339,2005-08-28,Cadiz,Real Madrid,1.0,2.0,A,0.0,1.0,A,1,...,19.0,25.0,8.0,8.0,2.0,2.0,0.0,0.0,05-06,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9108,2020-03-08,Vallecano,Elche,2.0,3.0,A,1.0,0.0,H,2,...,20.0,13.0,3.0,4.0,3.0,3.0,1.0,0.0,19-20,30
9109,2020-03-08,Malaga,Zaragoza,0.0,1.0,A,0.0,0.0,D,2,...,14.0,10.0,11.0,4.0,2.0,3.0,0.0,0.0,19-20,31
9110,2020-03-08,Alcorcon,Mirandes,1.0,2.0,A,0.0,0.0,D,2,...,15.0,13.0,3.0,4.0,6.0,1.0,1.0,0.0,19-20,31
9111,2020-03-08,Sp Gijon,Las Palmas,4.0,0.0,H,0.0,0.0,D,2,...,18.0,12.0,7.0,4.0,1.0,2.0,0.0,1.0,19-20,31


In [8]:
filename = "05-20_modified.csv"
file_path = data_folder / filename
df.to_csv(file_path, index=False)

In [9]:
df = read_data(file_path)
df

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,division,...,HF,AF,HC,AC,HY,AY,HR,AR,season,jornada
0,2005-08-27,Alaves,Barcelona,0.0,0.0,D,0.0,0.0,D,1,...,17.0,19.0,3.0,7.0,0.0,1.0,0.0,0.0,05-06,1
1,2005-08-27,Ath Bilbao,Sociedad,3.0,0.0,H,0.0,0.0,D,1,...,13.0,19.0,3.0,4.0,0.0,1.0,0.0,0.0,05-06,1
2,2005-08-27,Valencia,Betis,1.0,0.0,H,0.0,0.0,D,1,...,18.0,14.0,8.0,5.0,2.0,3.0,0.0,0.0,05-06,1
3,2005-08-28,Ath Madrid,Zaragoza,0.0,0.0,D,0.0,0.0,D,1,...,16.0,22.0,8.0,4.0,2.0,7.0,0.0,0.0,05-06,1
4,2005-08-28,Cadiz,Real Madrid,1.0,2.0,A,0.0,1.0,A,1,...,19.0,25.0,8.0,8.0,2.0,2.0,0.0,0.0,05-06,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6489,2020-03-08,Osasuna,Espanol,1.0,0.0,H,0.0,0.0,D,1,...,17.0,12.0,6.0,2.0,3.0,3.0,0.0,1.0,19-20,27
6488,2020-03-08,Valladolid,Ath Bilbao,1.0,4.0,A,0.0,2.0,A,1,...,9.0,13.0,8.0,1.0,1.0,2.0,0.0,0.0,19-20,27
6487,2020-03-08,Levante,Granada,1.0,1.0,D,1.0,0.0,H,1,...,21.0,13.0,6.0,4.0,3.0,3.0,0.0,0.0,19-20,27
6832,2020-03-08,Tenerife,Ponferradina,1.0,0.0,H,0.0,0.0,D,2,...,13.0,12.0,7.0,3.0,2.0,0.0,0.0,0.0,19-20,31
