In [1]:
import pandas as pd

In [2]:
import requests
from io import StringIO

In [3]:
url = 'https://raw.githubusercontent.com/escola-de-dados/notebooks-python-pandas/master/mlb.csv'

### LENDO UM ARQUIVO COM .CSV E ARMAZENANDO/TRANSFORMANDO EM UM DATAFRAME   

In [4]:
try: 
    response = requests.get(url)
    response.raise_for_status()         #lançar uma exerção/erro em caso de erro http

    # Usar StringIO para ler os dados como se fossem de um arquivo
    df = pd.read_csv(StringIO(response.text))

    # Exibir as primeiras linhas/registros do Dataframe
    print('Primeiras linhas do Dataframe: ')
    print(df.head())
except requests.exceptions.RequestException as error: 
    print(f'Erro ao ler o arquivo .csv: {error}')

Primeiras linhas do Dataframe: 
               NAME TEAM POS    SALARY  START_YEAR  END_YEAR  YEARS
0   Clayton Kershaw  LAD  SP  33000000        2014      2020      7
1      Zack Greinke  ARI  SP  31876966        2016      2021      6
2       David Price  BOS  SP  30000000        2016      2022      7
3    Miguel Cabrera  DET  1B  28000000        2014      2023     10
4  Justin Verlander  DET  SP  28000000        2013      2019      7


In [5]:
df.head()

Unnamed: 0,NAME,TEAM,POS,SALARY,START_YEAR,END_YEAR,YEARS
0,Clayton Kershaw,LAD,SP,33000000,2014,2020,7
1,Zack Greinke,ARI,SP,31876966,2016,2021,6
2,David Price,BOS,SP,30000000,2016,2022,7
3,Miguel Cabrera,DET,1B,28000000,2014,2023,10
4,Justin Verlander,DET,SP,28000000,2013,2019,7


In [6]:
df.describe()           # usar depois de realizar a limpeza dos dados

Unnamed: 0,SALARY,START_YEAR,END_YEAR,YEARS
count,868.0,868.0,868.0,868.0
mean,4468069.0,2016.486175,2017.430876,1.9447
std,5948459.0,1.205923,1.163087,1.916764
min,535000.0,2008.0,2015.0,1.0
25%,545500.0,2017.0,2017.0,1.0
50%,1562500.0,2017.0,2017.0,1.0
75%,6000000.0,2017.0,2017.0,2.0
max,33000000.0,2017.0,2027.0,13.0


In [7]:
df.tail()

Unnamed: 0,NAME,TEAM,POS,SALARY,START_YEAR,END_YEAR,YEARS
863,Steve Selsky,BOS,RF,535000,2017,2017,1
864,Stuart Turner,CIN,C,535000,2017,2017,1
865,Vicente Campos,LAA,RP,535000,2017,2017,1
866,Wandy Peralta,CIN,RP,535000,2017,2017,1
867,Yandy Diaz,CLE,3B,535000,2017,2017,1


In [8]:
df.sample(7)

Unnamed: 0,NAME,TEAM,POS,SALARY,START_YEAR,END_YEAR,YEARS
170,Lance Lynn,STL,SP,7500000,2015,2017,3
296,Mike Minor,KC,SP,4000000,2016,2017,2
34,Troy Tulowitzki,TOR,SS,20000000,2011,2020,10
581,Hunter Strickland,SF,RP,555000,2017,2017,1
285,Jeanmar Gomez,PHI,RP,4200000,2017,2017,1
693,Michael Tonkin,MIN,RP,542500,2017,2017,1
662,Greg Bird,NYY,1B,545000,2017,2017,1


In [9]:
df.shape

(868, 7)

In [10]:
df.dtypes

NAME          object
TEAM          object
POS           object
SALARY         int64
START_YEAR     int64
END_YEAR       int64
YEARS          int64
dtype: object

### ORDENANDO OS DADOS

In [11]:
df.sort_values('SALARY')

Unnamed: 0,NAME,TEAM,POS,SALARY,START_YEAR,END_YEAR,YEARS
867,Yandy Diaz,CLE,3B,535000,2017,2017,1
839,Jacob May,CWS,CF,535000,2017,2017,1
838,Glenn Sparkman,TOR,RP,535000,2017,2017,1
837,Dylan Covey,CWS,RP,535000,2017,2017,1
836,Drew Robinson,TEX,OF,535000,2017,2017,1
...,...,...,...,...,...,...,...
4,Justin Verlander,DET,SP,28000000,2013,2019,7
3,Miguel Cabrera,DET,1B,28000000,2014,2023,10
2,David Price,BOS,SP,30000000,2016,2022,7
1,Zack Greinke,ARI,SP,31876966,2016,2021,6


In [12]:
df.sort_values('SALARY', ascending=False)

Unnamed: 0,NAME,TEAM,POS,SALARY,START_YEAR,END_YEAR,YEARS
0,Clayton Kershaw,LAD,SP,33000000,2014,2020,7
1,Zack Greinke,ARI,SP,31876966,2016,2021,6
2,David Price,BOS,SP,30000000,2016,2022,7
3,Miguel Cabrera,DET,1B,28000000,2014,2023,10
4,Justin Verlander,DET,SP,28000000,2013,2019,7
...,...,...,...,...,...,...,...
836,Drew Robinson,TEX,OF,535000,2017,2017,1
837,Dylan Covey,CWS,RP,535000,2017,2017,1
838,Glenn Sparkman,TOR,RP,535000,2017,2017,1
839,Jacob May,CWS,CF,535000,2017,2017,1


In [13]:
(df.sort_values('SALARY', ascending=False)).head()

Unnamed: 0,NAME,TEAM,POS,SALARY,START_YEAR,END_YEAR,YEARS
0,Clayton Kershaw,LAD,SP,33000000,2014,2020,7
1,Zack Greinke,ARI,SP,31876966,2016,2021,6
2,David Price,BOS,SP,30000000,2016,2022,7
3,Miguel Cabrera,DET,1B,28000000,2014,2023,10
4,Justin Verlander,DET,SP,28000000,2013,2019,7


In [15]:
df[df.SALARY > 10000000]


Unnamed: 0,NAME,TEAM,POS,SALARY,START_YEAR,END_YEAR,YEARS
0,Clayton Kershaw,LAD,SP,33000000,2014,2020,7
1,Zack Greinke,ARI,SP,31876966,2016,2021,6
2,David Price,BOS,SP,30000000,2016,2022,7
3,Miguel Cabrera,DET,1B,28000000,2014,2023,10
4,Justin Verlander,DET,SP,28000000,2013,2019,7
...,...,...,...,...,...,...,...
126,Kendrys Morales,TOR,DH,11000000,2015,2016,2
127,Yovani Gallardo,SEA,SP,10888877,2016,2017,2
128,Kenley Jansen,LAD,RP,10800000,2017,2021,5
129,Matt Wieters,WSH,C,10500000,2017,2017,1
