## Manipulate data with Numpy and Pandas

### Create a Series of 4 rows and 2 columns with row index

In [1]:
import pandas as pd
import numpy as np

from pandas import Series, DataFrame

serie = Series(np.arange(4), index=['row 1', 'row 2', 'row 3', 'row 4'])
print(serie)

row 1    0
row 2    1
row 3    2
row 4    3
dtype: int64


### Create a DataFrame of 6 rows and 6 columns with rows and cols as labels

In [2]:
# Seed to get the same results everytime you run the code
np.random.seed(25)

df = DataFrame(np.random.rand(36).reshape(6,6), index=['row 1', 'row 2', 'row 3', 'row 4', 'row 5', 'row 6'], columns=['col 1', 'col 2', 'col 3', 'col 4', 'col 5', 'col 6'])
print(df)

          col 1     col 2     col 3     col 4     col 5     col 6
row 1  0.870124  0.582277  0.278839  0.185911  0.411100  0.117376
row 2  0.684969  0.437611  0.556229  0.367080  0.402366  0.113041
row 3  0.447031  0.585445  0.161985  0.520719  0.326051  0.699186
row 4  0.366395  0.836375  0.481343  0.516502  0.383048  0.997541
row 5  0.514244  0.559053  0.034450  0.719930  0.421004  0.436935
row 6  0.281701  0.900274  0.669612  0.456069  0.289804  0.525819


### Get a slice of the DataFrame specified by the columns and rows that you want to get

In [3]:
slice_df = df.loc[['row 1', 'row 2'], ['col 1', 'col 2']]
print(slice_df)

          col 1     col 2
row 1  0.870124  0.582277
row 2  0.684969  0.437611


### Get a slice of the DataFrame by setting a row and column and get all data in between

In [4]:
between_df = df['row 2':'row 5']
print(between_df)

          col 1     col 2     col 3     col 4     col 5     col 6
row 2  0.684969  0.437611  0.556229  0.367080  0.402366  0.113041
row 3  0.447031  0.585445  0.161985  0.520719  0.326051  0.699186
row 4  0.366395  0.836375  0.481343  0.516502  0.383048  0.997541
row 5  0.514244  0.559053  0.034450  0.719930  0.421004  0.436935


### Use numerical value to compare DataFrame and get DataFrame of result

In [5]:
compare_df = df < 0.2
print(compare_df)

       col 1  col 2  col 3  col 4  col 5  col 6
row 1  False  False  False   True  False   True
row 2  False  False  False  False  False   True
row 3  False  False   True  False  False  False
row 4  False  False  False  False  False  False
row 5  False  False   True  False  False  False
row 6  False  False  False  False  False  False


### Filter DataFrame with numerical value

In [6]:
compare_filter_df = df[ df < 0.2 ]
print(compare_filter_df)

       col 1  col 2     col 3     col 4  col 5     col 6
row 1    NaN    NaN       NaN  0.185911    NaN  0.117376
row 2    NaN    NaN       NaN       NaN    NaN  0.113041
row 3    NaN    NaN  0.161985       NaN    NaN       NaN
row 4    NaN    NaN       NaN       NaN    NaN       NaN
row 5    NaN    NaN  0.034450       NaN    NaN       NaN
row 6    NaN    NaN       NaN       NaN    NaN       NaN


### Filter Series with numerical value

In [7]:
compare_filter_serie = serie[ serie < 4]
print(compare_filter_serie)

row 1    0
row 2    1
row 3    2
row 4    3
dtype: int64


### Set Series rows equal to numerical value or string

In [8]:
new_serie = serie['row 2', 'row 1', 'row 3'] = 'Serban'
print(serie)

row 1    Serban
row 2    Serban
row 3    Serban
row 4         3
dtype: object


### Clone DataFrame (do not assign to new variable)

In [9]:
new_df = df.copy()
print(new_df)

          col 1     col 2     col 3     col 4     col 5     col 6
row 1  0.870124  0.582277  0.278839  0.185911  0.411100  0.117376
row 2  0.684969  0.437611  0.556229  0.367080  0.402366  0.113041
row 3  0.447031  0.585445  0.161985  0.520719  0.326051  0.699186
row 4  0.366395  0.836375  0.481343  0.516502  0.383048  0.997541
row 5  0.514244  0.559053  0.034450  0.719930  0.421004  0.436935
row 6  0.281701  0.900274  0.669612  0.456069  0.289804  0.525819


### Set DataFrame cells equal to numerical value or string

In [10]:
new_df.loc[0:3, 'col 2'] = 'Serban'
new_df.loc['row 1':'row 4', 'col 1'] = 'Mihai'
print(new_df)

          col 1     col 2     col 3     col 4     col 5     col 6
row 1     Mihai    Serban  0.278839  0.185911  0.411100  0.117376
row 2     Mihai    Serban  0.556229  0.367080  0.402366  0.113041
row 3     Mihai    Serban  0.161985  0.520719  0.326051  0.699186
row 4     Mihai  0.836375  0.481343  0.516502  0.383048  0.997541
row 5  0.514244  0.559053  0.034450  0.719930  0.421004  0.436935
row 6  0.281701  0.900274  0.669612  0.456069  0.289804  0.525819


### Transform Series to DataFrame

In [11]:
transformed_serie = serie.to_frame(name='col 1')
print(transformed_serie)

        col 1
row 1  Serban
row 2  Serban
row 3  Serban
row 4       3


### Transform DataFrame to Series

In [12]:
transformed_df = df.iloc[0]
print(transformed_df)

col 1    0.870124
col 2    0.582277
col 3    0.278839
col 4    0.185911
col 5    0.411100
col 6    0.117376
Name: row 1, dtype: float64
