# FIFA DataSet Analysis


* Importing Libraries

In [283]:
#Numpy
import numpy as np

# Pandas
import pandas as pd

# BeautifulSoup
from bs4 import BeautifulSoup

# Requests
import requests

# Reges
import regex as re 


* Web Scrapping


In [284]:
req=requests.get("https://sofifa.com/players?offset=1")
req

<Response [200]>

* BeautifulSoup

In [285]:
soup=BeautifulSoup(req.content)

In [286]:
# Adding the attributes to the array
arr=[]
for i in soup.findAll('td'):
    arr.append(str(i))

* Cleaning Image URL

In [287]:
clean_img=re.sub('<td.*\n.*data-src=.|".*',"",arr[0])
clean_img

'https://cdn.sofifa.net/players/253/537/22_60.png'

In [288]:
# The row is from 0-8
arr[0]

'<td class="col-avatar" data-balloon="Click here!" data-balloon-pos="up" data-balloon-visible=""><figure class="avatar">\n<img alt="" class="player-check" data-root="https://cdn.sofifa.net/players/" data-src="https://cdn.sofifa.net/players/253/537/22_60.png" data-srcset="https://cdn.sofifa.net/players/253/537/22_120.png 2x, https://cdn.sofifa.net/players/253/537/22_180.png 3x" data-type="player" id="253537" src="https://cdn.sofifa.net/players/notfound_0_60.png"/></figure></td>'

* Cleaning Name

In [289]:
clean_name=re.sub('<td.*\n.*s">|<\/div.*>',"",arr[1])
clean_name

'B. Castillo'

* Cleaning Age

In [290]:
clean_age=re.sub('<td.*e">|<.*>',"",arr[11])
clean_age

'21'

* Cleaning OVA

In [291]:
clean_ova=re.sub('<td.*p.*">|<.*>',"",arr[3])
clean_ova

'76'

* Cleaning POT

In [316]:
clean_pot=re.sub('<td.*p.*">|<.*>',"",arr[4])
clean_pot

'81'

* Cleaning Team & Contract

In [293]:
clean_team=re.sub('<td.*\n<d.*\n<i.*\n</f.*\n.*/">|<.*\n.*>\n.*>\n.*>',"",arr[5])
clean_team

'Barcelona Sporting Club'

* Cleaning Value

In [294]:
clean_value=re.sub('<td.*">|<.*>',"",arr[6])
clean_value

'€11M'

* Cleaning Wage

In [295]:
clean_wage=re.sub('<td.*">|<.*>',"",arr[7])
clean_wage

'€1K'

* Cleaning Total

In [296]:
clean_total=re.sub('<td.*">|<.*>',"",arr[8])
clean_total

'1898'

* Looping through the data

In [323]:
#0-8
img=[]
name=[]
age=[]
ova=[]
pot=[]
team=[]
value=[]
wage=[]
total=[]

count=0
for j in range(1,20):
    url=f"https://sofifa.com/players?offset=1{j}"
    req=requests.get(url)
    soup=BeautifulSoup(req.content)
    
    for i in soup.findAll('td'):
        clean_img=re.sub('<td.*\n.*data-src=.|".*',"",str(i))
        clean_name=re.sub('<td.*\n.*s">|<\/div.*>',"",str(i))
        clean_age=re.sub('<td.*e">|<.*>',"",str(i))
        clean_ova=re.sub('<td.*p.*">|<.*>',"",str(i))
        clean_pot=re.sub('<td.*p.*">|<.*>',"",str(i))
        clean_team=re.sub('<td.*\n<d.*\n<i.*\n</f.*\n.*/">|<.*\n.*>\n.*>\n.*>',"",str(i))
        clean_value=re.sub('<td.*">.|.<.*>',"",str(i))
        clean_wage=re.sub('<td.*">.|.<.*>',"",str(i))
        clean_total=re.sub('<td.*">|<.*>',"",str(i))
        
        if count==0:
            img.append(clean_img)
            count+=1
        elif count==1:
            name.append(clean_name)
            count+=1
        elif count==2:
            age.append(clean_age)
            count+=1
        elif count==3:
            ova.append(clean_ova)
            count+=1
        elif count==4:
            pot.append(clean_pot)
            count+=1
        elif count==5:
            team.append(clean_team)
            count+=1
        elif count==6:
            value.append(clean_value)
            count+=1
        elif count==7:
            wage.append(clean_wage)
            count+=1
        else:
            count=0
            total.append(clean_total)
        


In [331]:
fifa=pd.DataFrame({'Image':img,'Name':name,'Age':age,'OVA':ova,'POT':pot,'Team':team,'Value (M$)':value,'Wage (K$)':wage,'Total':total})
fifa['Age']=pd.to_numeric(fifa['Age'])
fifa['OVA']=pd.to_numeric(fifa['OVA'])
fifa['POT']=pd.to_numeric(fifa['POT'])
fifa['Value (M$)']=pd.to_numeric(fifa['Value (M$)'])
fifa['Wage (K$)']=pd.to_numeric(fifa['Wage (K$)'])
fifa['Total']=pd.to_numeric(fifa['Total'])

In [328]:
fifa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1140 entries, 0 to 1139
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Image   1140 non-null   object 
 1   Name    1140 non-null   object 
 2   Age     1140 non-null   int64  
 3   OVA     1140 non-null   int64  
 4   POT     1140 non-null   int64  
 5   Team    1140 non-null   object 
 6   Value   1140 non-null   float64
 7   Wage    1140 non-null   int64  
 8   Total   1140 non-null   int64  
dtypes: float64(1), int64(5), object(3)
memory usage: 80.3+ KB


In [332]:
fifa

Unnamed: 0,Image,Name,Age,OVA,POT,Team,Value (M$),Wage (K$),Total
0,https://cdn.sofifa.net/players/158/023/22_60.png,L. Messi,34,92,92,Paris Saint-Germain,69.5,290,2206
1,https://cdn.sofifa.net/players/237/692/22_60.png,P. Foden,21,84,92,Manchester City,94.5,140,2073
2,https://cdn.sofifa.net/players/251/805/22_60.png,J. Timber,20,79,88,Ajax,36.0,13,1935
3,https://cdn.sofifa.net/players/243/630/22_60.png,J. David,21,80,86,LOSC Lille,34.0,36,1930
4,https://cdn.sofifa.net/players/254/796/22_60.png,N. Madueke,19,77,88,PSV,23.0,11,1897
...,...,...,...,...,...,...,...,...,...
1135,https://cdn.sofifa.net/players/241/852/22_60.png,M. Diaby,21,83,88,Bayer 04 Leverkusen,56.0,55,1948
1136,https://cdn.sofifa.net/players/244/778/22_60.png,Trincão,21,76,83,Wolverhampton Wanderers,15.0,84,1879
1137,https://cdn.sofifa.net/players/245/367/22_60.png,X. Simons,18,69,82,Paris Saint-Germain,3.3,13,1865
1138,https://cdn.sofifa.net/players/246/172/22_60.png,S. Chukwueze,22,77,84,Villarreal CF,21.5,27,1865
