## Pandas overview

##### Series and DataFrames are Pandas data scructure

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Creating a series
num = [1,2,3,4,5]
pd.Series(num)

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [3]:
week_days = ['Mon', 'Tues', 'Wed', 'Thur', 'Fri']
pd.Series(week_days, index = ['a', 'b', 'c', 'd', 'e'])

a     Mon
b    Tues
c     Wed
d    Thur
e     Fri
dtype: object

In [4]:
# Creating series from dictionary

players = {'Neymar': 'PSG',
          'Messi': 'PSG',
          'Luisito': 'Gremio',
          'Lewandowski': 'Barcelona'}
pd.Series(players)

Neymar               PSG
Messi                PSG
Luisito           Gremio
Lewandowski    Barcelona
dtype: object

In [5]:
# Creating series from Numpy Array
np_array = ([1,2,3,4,5])
pd.Series(np_array)

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [6]:
# Creating DataFames

# From a dictionary
players2 = {'Name': ['Tiago Cardoso', 'Gabigol', 'Bruno Henrique', 'Cassio'],
           'Times': ['Santa Cruz', 'Flamengo', 'Flamengo', 'Corinthians']}

pd.DataFrame(players2)

Unnamed: 0,Name,Times
0,Tiago Cardoso,Santa Cruz
1,Gabigol,Flamengo
2,Bruno Henrique,Flamengo
3,Cassio,Corinthians


In [7]:
# From a 2d array

array_2d = np.array([[1,2,3], [3,2,1], [2,1,3]])
pd.DataFrame(array_2d, columns = ['a', 'b', 'c'])

Unnamed: 0,a,b,c
0,1,2,3
1,3,2,1
2,2,1,3


In [11]:
# From a Pandas Series

players_series = pd.Series(players)
df = pd.DataFrame(players_series, columns = ['Team'])
df

Unnamed: 0,Team
Neymar,PSG
Messi,PSG
Luisito,Gremio
Lewandowski,Barcelona


In [13]:
# Add a column

df['Height cm'] = ['175', '169', '182', '185']
df

Unnamed: 0,Team,Height cm
Neymar,PSG,175
Messi,PSG,169
Luisito,Gremio,182
Lewandowski,Barcelona,185


In [19]:
# Drop a column

print(df.drop('Height cm', axis=1))

                  Team
Neymar             PSG
Messi              PSG
Luisito         Gremio
Lewandowski  Barcelona


In [20]:
df.columns

Index(['Team', 'Height cm'], dtype='object')

In [21]:
df.index

Index(['Neymar', 'Messi', 'Luisito', 'Lewandowski'], dtype='object')

## Data indexing, selection and iteration

In [24]:
players2 = {'Name': ['Tiago Cardoso', 'Gabigol', 'Bruno Henrique', 'Cassio'],
           'Times': ['Santa Cruz', 'Flamengo', 'Flamengo', 'Corinthians']}

players_df = pd.DataFrame(players2)
players_df

Unnamed: 0,Name,Times
0,Tiago Cardoso,Santa Cruz
1,Gabigol,Flamengo
2,Bruno Henrique,Flamengo
3,Cassio,Corinthians


In [25]:
players_df['Name']

0     Tiago Cardoso
1           Gabigol
2    Bruno Henrique
3            Cassio
Name: Name, dtype: object

In [26]:
players_df.Name

0     Tiago Cardoso
1           Gabigol
2    Bruno Henrique
3            Cassio
Name: Name, dtype: object

In [27]:
# Selection only those columns
players_df[['Name', 'Times']]

Unnamed: 0,Name,Times
0,Tiago Cardoso,Santa Cruz
1,Gabigol,Flamengo
2,Bruno Henrique,Flamengo
3,Cassio,Corinthians


In [31]:
# Get just some rows
players_df[0:2]

Unnamed: 0,Name,Times
0,Tiago Cardoso,Santa Cruz
1,Gabigol,Flamengo


In [32]:
players_df[2:]

Unnamed: 0,Name,Times
2,Bruno Henrique,Flamengo
3,Cassio,Corinthians


In [35]:
players_df.loc[3]

Name          Cassio
Times    Corinthians
Name: 3, dtype: object

In [36]:
players_df.loc[2:3]

Unnamed: 0,Name,Times
2,Bruno Henrique,Flamengo
3,Cassio,Corinthians


In [38]:
players_df.iloc[2]

Name     Bruno Henrique
Times          Flamengo
Name: 2, dtype: object

In [41]:
players_df.iloc[1:3]

Unnamed: 0,Name,Times
1,Gabigol,Flamengo
2,Bruno Henrique,Flamengo


In [47]:
sample = {'Times': ['Santa Cruz']}
players_df.isin(sample)

Unnamed: 0,Name,Times
0,False,True
1,False,False
2,False,False
3,False,False
