In [1]:
import numpy as np
import pandas as pd

## Series

In [7]:
random_series = pd.Series(
    np.random.randint(20, 31, size=5),
    index=['raz', 'dwa', 'trzy', 'cztery', 'pięć'],
    name="SOME_SERIES"
)
random_series

raz       24
dwa       26
trzy      30
cztery    29
pięć      28
Name: SOME_SERIES, dtype: int32

## DataFrame

In [3]:
random_df = pd.DataFrame(
    np.random.randint(1, 11, size=(4, 4)),
    index=[f"Dzień {n}" for n in range(1, 5)],
    columns=[f"Column {l}" for l in "ABCD"],
)
random_df

Unnamed: 0,Column A,Column B,Column C,Column D
Dzień 1,8,1,8,7
Dzień 2,9,4,3,2
Dzień 3,9,7,1,4
Dzień 4,8,3,4,3


#### xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

#### xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

In [4]:
random_df.columns

Index(['Column A', 'Column B', 'Column C', 'Column D'], dtype='object')

In [5]:
random_df.index

Index(['Dzień 1', 'Dzień 2', 'Dzień 3', 'Dzień 4'], dtype='object')

In [6]:
random_df.sort_values(by="Column A")

random_df.sort_values(by="Dzień 1", axis=1)

Unnamed: 0,Column C,Column B,Column D,Column A
Dzień 1,5,7,9,10
Dzień 2,6,8,1,8
Dzień 3,2,3,7,7
Dzień 4,8,4,5,10


## Convert pandas structure to numpy ndarray

In [7]:
random_df_numpy = random_df.to_numpy()
random_df_numpy
# type(random_df_numpy)

random_series_numpy = random_series.to_numpy()
random_series_numpy
# type(random_series_numpy)

array([21, 28, 23, 27, 20])

In [8]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": "abcd",
        "C": ['raz', 'dwa', 'trzy', 'cztery']
    }
)
df2

Unnamed: 0,A,B,C
0,1.0,abcd,raz
1,1.0,abcd,dwa
2,1.0,abcd,trzy
3,1.0,abcd,cztery


## Selecting and indexing

In [9]:
random_df

Unnamed: 0,Column A,Column B,Column C,Column D
Dzień 1,10,7,5,9
Dzień 2,8,8,6,1
Dzień 3,7,3,2,7
Dzień 4,10,4,8,5


### By column(s)

In [10]:
# random_df["Column B"]

In [11]:
# random_df[['Column A', "Column B"]]

### By row(s)

In [12]:
# random_df[1:3]

### Multi-axis indexing

#### Syntax:
 - df.loc[row, column]
 - df.loc[[rows], [columns]]

In [13]:
random_df.loc["Dzień 1", ["Column A", "Column B"]]

Column A    10
Column B     7
Name: Dzień 1, dtype: int32

In [14]:
random_df.loc[["Dzień 1", "Dzień 2"], :]

Unnamed: 0,Column A,Column B,Column C,Column D
Dzień 1,10,7,5,9
Dzień 2,8,8,6,1


In [15]:
random_df.loc["Dzień 3", "Column B"]

3

### Lamda functions recap


In [16]:
names = ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace"]
names.sort(key=lambda x: len(x))
names

# names = [name.replace('a', 'XYZ') for name in names]
# names

['Bob', 'Eve', 'Alice', 'David', 'Frank', 'Grace', 'Charlie']

### Selecting using .loc and boolean series (masking)

In [17]:
# Boolean series
boolean_series = random_df['Column A'] > 5
boolean_series

Dzień 1    True
Dzień 2    True
Dzień 3    True
Dzień 4    True
Name: Column A, dtype: bool

In [18]:
# Boolean series as numpy array
boolean_array = (random_df['Column A'] > 5).to_numpy()
boolean_array

array([ True,  True,  True,  True])

In [19]:
# Mask - boolean array
random_df.loc[boolean_array, :]

# Mask - boolean series
random_df.loc[boolean_series, :]

Unnamed: 0,Column A,Column B,Column C,Column D
Dzień 1,10,7,5,9
Dzień 2,8,8,6,1
Dzień 3,7,3,2,7
Dzień 4,10,4,8,5


In [20]:
random_df[random_df["Column A"] > 5]
# or
random_df.loc[random_df["Column A"] > 5]

Unnamed: 0,Column A,Column B,Column C,Column D
Dzień 1,10,7,5,9
Dzień 2,8,8,6,1
Dzień 3,7,3,2,7
Dzień 4,10,4,8,5


In [21]:
df3 = pd.DataFrame({'a': ['one', 'one', 'two', 'three', 'two', 'one', 'six'],
                    'b': ['x', 'y', 'y', 'x', 'y', 'x', 'x'],
                    'c': np.random.randn(7)})

# only want 'two' or 'three'
df3["a"][df3["a"] != 'one']
df3["a"][lambda x: x != 'one']

2      two
3    three
4      two
6      six
Name: a, dtype: object

# UDEMY TUTORIAL

In [22]:
5+4


9