# PYTHON Pandas BASICS

- We must install in our Python environment Pandas: **conda install pandas** (using Anaconda)
- Pandas is built on NumPy, and allows us manipulate data and arrays more intuitively.

In [1]:
import pandas as pd
#help(pd.Series.loc)

#### Series  and DataFrame
- Serie has 1-dimension, DataFrame has 2-dimension
- Both of them can cointain any type of data (objects, numbers, strings, ...)
- We can access their items the same way we do it with lists or NumPy arrays, as well as with specific methods

In [14]:
print('--- Serie (1D) ---')
serie = pd.Series([0,1,'Tururu',3], index=['a','b','c','d']) # 1D
print(serie)
print(serie.shape)
print(serie.ndim)
print(serie.size)
print(serie[:2])
serie[1] = True
print(serie)
serie['b'] = False
print(serie)
print(serie.drop(['b'])) # .crop() returns a new Series object without dropped elements (.drop[1] doesn't work!)

print('\n--- DataFrame (2D) ---')
data = {'Name': ['Pizca','Viruta','Confeti','Mopa'],
       'Color': ['Grey','Grey','Grey','Black'],
       'Age': [5,4,3,13]}
data_frame = pd.DataFrame(data, columns=['Name','Color','Age']) # 2D
print(data_frame)
print(data_frame.head(2)) # First 2 items
print(data_frame.tail(2)) # Last 2 items
print(data_frame.shape)
print(data_frame.ndim)
print(data_frame.size)
print(data_frame.columns)
print(data_frame.columns[1])
print(data_frame.index) # What's its funcionallity?
print(data_frame[1:]['Name']) # ['Name'] as index: column name isn't showed, 1D
print(data_frame[1:]['Name'].shape) 
print(data_frame[1:][['Name']]) # [['Name']] as list: column name is showed, 2D
print(data_frame[1:][['Name']].shape) 
print(data_frame[1:][['Name','Age']]) # Ccolumn names are showed, 2D
print(data_frame[1:][['Name','Age']].shape)

print('\nSPECIFIC METHODS')
print(data_frame.iloc[1]) # Accessing row by index
print(data_frame*2)
print(data_frame+data_frame)
print(data_frame.iloc[1::-1,1]) # Accessing columns by index
print(data_frame.loc[1::-1,'Color']) # Accessing columns by column label
print(data_frame[data_frame['Color'] == 'Grey'])
data_frame[:2]['Color'] = 'Blonde' # It throws a warning message, but .loc() doesn't work!
print(data_frame)
print(data_frame.drop([1,2])) # Drop rows
print(data_frame.drop([1,2], axis = 0)) # Drop rows
print(data_frame.drop('Age', axis = 1)) # Drop columns

--- Serie (1D) ---
a         0
b         1
c    Tururu
d         3
dtype: object
(4,)
1
4
a    0
b    1
dtype: object
a         0
b      True
c    Tururu
d         3
dtype: object
a         0
b     False
c    Tururu
d         3
dtype: object
a         0
c    Tururu
d         3
dtype: object

--- DataFrame (2D) ---
      Name  Color  Age
0    Pizca   Grey    5
1   Viruta   Grey    4
2  Confeti   Grey    3
3     Mopa  Black   13
     Name Color  Age
0   Pizca  Grey    5
1  Viruta  Grey    4
      Name  Color  Age
2  Confeti   Grey    3
3     Mopa  Black   13
(4, 3)
2
12
Index(['Name', 'Color', 'Age'], dtype='object')
Color
RangeIndex(start=0, stop=4, step=1)
1     Viruta
2    Confeti
3       Mopa
Name: Name, dtype: object
(3,)
      Name
1   Viruta
2  Confeti
3     Mopa
(3, 1)
      Name  Age
1   Viruta    4
2  Confeti    3
3     Mopa   13
(3, 2)

SPECIFIC METHODS
Name     Viruta
Color      Grey
Age           4
Name: 1, dtype: object
             Name       Color  Age
0      PizcaPizca  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


#### Filtering and subsetting dataframes

In [48]:
print('\n--- DataFrame (2D) ---')
data = {'Name': ['Pizca','Viruta','Confeti','Mopa'],
       'Color': ['Grey','Grey','Grey','Black'],
       'Age': [5,4,3,13]}
data_frame = pd.DataFrame(data, columns = ['Name','Color','Age']) # 2D
print(data_frame)

# filter() method
print(data_frame.filter(['Name','Age'])) # Filter by columns

# Filter rows based on certain criteria:
print(data_frame[data_frame['Name']>'N'])
print(data_frame[data_frame['Name'].str.contains('a', na = False)]) # na = False removes NA/NaN values (otherwise: ValueError)
print(data_frame.set_index('Name').filter(regex='[mM]',axis = 0)) # New DF with 'Name' values as indexes


--- DataFrame (2D) ---
      Name  Color  Age
0    Pizca   Grey    5
1   Viruta   Grey    4
2  Confeti   Grey    3
3     Mopa  Black   13
      Name  Age
0    Pizca    5
1   Viruta    4
2  Confeti    3
3     Mopa   13
     Name Color  Age
0   Pizca  Grey    5
1  Viruta  Grey    4
     Name  Color  Age
0   Pizca   Grey    5
1  Viruta   Grey    4
3    Mopa  Black   13
      Color  Age
Name            
Mopa  Black   13


#### Making transformations

In [47]:
print('--- Serie (1D) ---')
serie = pd.Series([0,1,'Tururu',3], index = ['a','b','c','d']) # 1D
print(serie)
print('\n--- DataFrame (2D) ---')
data = {'Name': ['Pizca','Viruta','Confeti','Mopa'],
       'Color': ['Grey','Grey','Grey','Black'],
       'Age': [5,4,3,13]}
data_frame = pd.DataFrame(data, columns = ['Name','Color','Age']) # 2D
print(data_frame)

print('\n--- Applying lambda functions ---')
function = lambda x: 'Printing ' + str(x)

print(serie.apply(function)) # Returns new Serie, doesn't modify the original
print(serie)

print(data_frame.apply(function))

# apply(function) -> applied to specific items
print(data_frame.apply(lambda x: x['Color'] + ' :)', axis = 1))
data_frame['Happy color'] = data_frame.apply(lambda x: x['Color'] + ' :)', axis = 1) # Add new column with lambda function
print(data_frame)
data_frame['Happy color'] = data_frame.apply(lambda x: x['Happy color'] + ' :D', axis = 1) # Update column with lambda function
print(data_frame)

# applymap(function) -> applied to all items
print(data_frame.applymap(function))
data_frame = data_frame.applymap(lambda x: str(x) + ' :(')
print(data_frame)

--- Serie (1D) ---
a         0
b         1
c    Tururu
d         3
dtype: object

--- DataFrame (2D) ---
      Name  Color  Age
0    Pizca   Grey    5
1   Viruta   Grey    4
2  Confeti   Grey    3
3     Mopa  Black   13

--- Applying lambda functions ---
a         Printing 0
b         Printing 1
c    Printing Tururu
d         Printing 3
dtype: object
a         0
b         1
c    Tururu
d         3
dtype: object
Name     Printing 0      Pizca\n1     Viruta\n2    Conf...
Color    Printing 0     Grey\n1     Grey\n2     Grey\n3...
Age      Printing 0     5\n1     4\n2     3\n3    13\nN...
dtype: object
0     Grey :)
1     Grey :)
2     Grey :)
3    Black :)
dtype: object
      Name  Color  Age Happy color
0    Pizca   Grey    5     Grey :)
1   Viruta   Grey    4     Grey :)
2  Confeti   Grey    3     Grey :)
3     Mopa  Black   13    Black :)
      Name  Color  Age  Happy color
0    Pizca   Grey    5   Grey :) :D
1   Viruta   Grey    4   Grey :) :D
2  Confeti   Grey    3   Grey :) :D
3    

#### Operations between series/dataframes

In [64]:
print('--- Series (1D) ---')
serie = pd.Series([0,1,2,3], index = ['a','b','c','d']) # 1D
serie2 = pd.Series([9,8,7], index = ['a','b','x'])
print(serie)
print(serie2)

print('\n--- DataFrames (2D) ---')
data = {'Name': ['Pizca','Viruta','Confeti','Mopa'],
       'Color': ['Grey','Grey','Grey','Black'],
       'Age': [5,4,3,13]}
data_frame = pd.DataFrame(data, columns = ['Name','Color','Age']) # 2D
data2 = {'Name': ['Pizca2','Viruta2','Confeti2','Mopa2'],
       'Color': ['Grey2','Grey2','Grey2','Black2'],
       'Age': [25,24,23,213]}
data_frame2 = pd.DataFrame(data2, columns = ['Name','Color','Age'])
print(data_frame)
print(data_frame2)

print('\n--- Operations ---')
print(serie + serie2) # Items with indexes that don't match can't be added, so the result is NaN
print(serie.add(serie2, fill_value = 1000)) # Assign value=1000 to missing items, also works with sub(),div(),mul()

print(data_frame + data_frame2) # Data sets labels and column names must match
print(data_frame.add(data_frame2))

--- Series (1D) ---
a    0
b    1
c    2
d    3
dtype: int64
a    9
b    8
x    7
dtype: int64

--- DataFrames (2D) ---
      Name  Color  Age
0    Pizca   Grey    5
1   Viruta   Grey    4
2  Confeti   Grey    3
3     Mopa  Black   13
       Name   Color  Age
0    Pizca2   Grey2   25
1   Viruta2   Grey2   24
2  Confeti2   Grey2   23
3     Mopa2  Black2  213

--- Operations ---
a    9.0
b    9.0
c    NaN
d    NaN
x    NaN
dtype: float64
a       9.0
b       9.0
c    1002.0
d    1003.0
x    1007.0
dtype: float64
              Name        Color  Age
0      PizcaPizca2    GreyGrey2   30
1    VirutaViruta2    GreyGrey2   28
2  ConfetiConfeti2    GreyGrey2   26
3        MopaMopa2  BlackBlack2  226
              Name        Color  Age
0      PizcaPizca2    GreyGrey2   30
1    VirutaViruta2    GreyGrey2   28
2  ConfetiConfeti2    GreyGrey2   26
3        MopaMopa2  BlackBlack2  226
