# Summaries

## Libraries

In [1]:
import numpy as np
import pandas as pd

## Functions

In [65]:
def create_pandas_dataframe():
    return pd.DataFrame(
        [
            ['Ana', 28, 1.72, True, 8],
            ['Bruno', 33, 1.69, False, 7],
            ['César', 34, 1.77, False, 8],
            ['Diego', 23, 1.79, True, 10],
            ['Erika', 25, 1.69, True, 7]
        ],
        columns=["name", "age", "height", "has_linux", "grade"]
    )

## Python

### $2^{3^2}$

In [2]:
2**3**2

512

### $\sqrt[2]{25}$

In [3]:
print(25 ** (1/2))

5.0


### $\sqrt[3]{64}$

In [4]:
print(64 ** (1/3))

3.9999999999999996


### Quotient and residue

In [5]:
22 // 6

3

In [6]:
22 % 6

4

### Tuple as dict key

In [7]:
tuple_as_dict_key = {
    (1, 2, 3): ['gold', 'silver coins'],
    (2, 4, 5.6): 'ship'
}

tuple_as_dict_key

{(1, 2, 3): ['gold', 'silver coins'], (2, 4, 5.6): 'ship'}

## Numpy

### Create arrays

In [8]:
np.array([1, 4, 5])

array([1, 4, 5])

In [9]:
np.array([[1, 4, 5], [2, 5, 7]])

array([[1, 4, 5],
       [2, 5, 7]])

In [10]:
np.array([(1, 4, 5), (2, 5, 7)])

array([[1, 4, 5],
       [2, 5, 7]])

In [11]:
np.zeros(4)

array([0., 0., 0., 0.])

In [12]:
np.zeros((2, 5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [13]:
np.ones(4)

array([1., 1., 1., 1.])

In [14]:
np.full((3, 2), 34)

array([[34, 34],
       [34, 34],
       [34, 34]])

In [15]:
# start, end - 1, step
np.arange(10, 20, 2)

array([10, 12, 14, 16, 18])

In [16]:
# start, end, number of parts
np.linspace(1, 10, 5)

array([ 1.  ,  3.25,  5.5 ,  7.75, 10.  ])

### Arrays info

#### Shape

In [17]:
np.array([1, 2, 3]).shape

(3,)

In [18]:
np.array([
    [1, 2, 3], 
    [4, 5, 6]
]).shape

(2, 3)

In [19]:
np.array([
    [[1, 2], [3, 4]], 
    [[5, 6], [7, 8]]
]).shape

(2, 2, 2)

#### Len

In [20]:
len(np.array([1, 2, 3]))

3

In [21]:
len(np.array([
    [1, 2, 3], 
    [4, 5, 6]
]))

2

In [22]:
len(np.array([
    [[1, 2], [3, 4]], 
    [[5, 6], [7, 8]]
]))

2

#### Ndim

In [23]:
np.array([1, 2, 3]).ndim

1

In [24]:
np.array([
    [1, 2, 3], 
    [4, 5, 6]
]).ndim

2

In [25]:
np.array([
    [[1, 2], [3, 4]], 
    [[5, 6], [7, 8]]
]).ndim

3

#### Size

In [26]:
np.array([1, 2, 3]).size

3

In [27]:
np.array([
    [1, 2, 3], 
    [4, 5, 6]
]).size

6

In [28]:
np.array([
    [[1, 2], [3, 4]], 
    [[5, 6], [7, 8]]
]).size

8

### Operations

#### Sum

In [29]:
np.array([1, 1, 1]) + np.array([2, 2, 2])

array([3, 3, 3])

In [30]:
np.add(np.array([1, 1, 1]), np.array([2, 2, 2]))

array([3, 3, 3])

In [31]:
np.array([1, 2, 3]).sum()

6

#### Subtract

In [32]:
np.array([1, 1, 1]) - np.array([2, 2, 2])

array([-1, -1, -1])

In [33]:
np.subtract(np.array([1, 1, 1]), np.array([2, 2, 2]))

array([-1, -1, -1])

#### Stats

In [34]:
np.array([1, 2, 3]).max()

3

In [35]:
np.array([1, 2, 3]).min()

1

In [36]:
# (1 + 2 + 3 + 4) / 4 = 10 / 4 = 2.5
np.array([1, 2, 3, 4]).mean()

2.5

In [37]:
# median(1, 2, 8, 3, 4) 
# order 1, 2, 3, 4, 8
# select the middle element 3
np.median(np.array([1, 2, 8, 3, 4]))

3.0

In [38]:
# median(1, 2, 8, 3, 4, 3.5) 
# order 1, 2, 3, 3.5, 4, 8
# calculate the mean of the elements in the middle (3 + 3.5) / 2 = 3.25
np.median(np.array([1, 2, 8, 3, 4, 3.5]))

3.25

## Pandas

### Create DataFrame

In [39]:
pd.DataFrame({
    'name': ['Ana', 'Bruno', 'César', 'Diego', 'Erika'],
	'age': [28, 33, 34, 23, 25],
	'height': [1.72, 1.69, 1.77, 1.79, 1.69],
	'has_linux': [True, False, False, True, True],
	'grade': [8, 7, 8, 10, 7]
})

Unnamed: 0,name,age,height,has_linux,grade
0,Ana,28,1.72,True,8
1,Bruno,33,1.69,False,7
2,César,34,1.77,False,8
3,Diego,23,1.79,True,10
4,Erika,25,1.69,True,7


In [67]:
pd.DataFrame(
        [
            ['Ana', 28, 1.72, True, 8],
            ['Bruno', 33, 1.69, False, 7],
            ['César', 34, 1.77, False, 8],
            ['Diego', 23, 1.79, True, 10],
            ['Erika', 25, 1.69, True, 7]
        ],
        columns=["name", "age", "height", "has_linux", "grade"]
)

Unnamed: 0,name,age,height,has_linux,grade
0,Ana,28,1.72,True,8
1,Bruno,33,1.69,False,7
2,César,34,1.77,False,8
3,Diego,23,1.79,True,10
4,Erika,25,1.69,True,7


In [43]:
pd.read_csv("https://raw.githubusercontent.com/000paradox000/ucamp-ds-examples/main/sample_data/informacion/informacion.csv").head(5)

Unnamed: 0,Nombre,Edad,Estatura,¿Beca?,Calificación
0,Theodosia,33,182,VERDADERO,5
1,Genia,29,165,VERDADERO,7
2,Oliver,36,178,FALSO,8
3,Clint,19,176,FALSO,8
4,Nefen,22,176,FALSO,5


In [44]:
pd.read_excel("https://github.com/000paradox000/ucamp-ds-examples/raw/main/sample_data/informacion/informacion.xlsx").head(5)

Unnamed: 0,Nombre,Edad,Estatura,¿Beca?,Calificación
0,Theodosia,33,182,True,5
1,Genia,29,165,True,7
2,Oliver,36,178,False,8
3,Clint,19,176,False,8
4,Nefen,22,176,False,5


### DataFrame info

In [68]:
create_pandas_dataframe().columns

Index(['name', 'age', 'height', 'has_linux', 'grade'], dtype='object')

In [69]:
create_pandas_dataframe().shape

(5, 5)

In [70]:
create_pandas_dataframe().age

0    28
1    33
2    34
3    23
4    25
Name: age, dtype: int64

In [71]:
create_pandas_dataframe()["age"]

0    28
1    33
2    34
3    23
4    25
Name: age, dtype: int64

In [72]:
create_pandas_dataframe()[["age", "has_linux"]]

Unnamed: 0,age,has_linux
0,28,True
1,33,False
2,34,False
3,23,True
4,25,True


In [73]:
type(create_pandas_dataframe()[["age", "has_linux"]])

pandas.core.frame.DataFrame

In [74]:
type(create_pandas_dataframe())

pandas.core.frame.DataFrame

In [75]:
create_pandas_dataframe().dtypes

name          object
age            int64
height       float64
has_linux       bool
grade          int64
dtype: object

### Slicing / Filtering

In [76]:
create_pandas_dataframe()[1:3]

Unnamed: 0,name,age,height,has_linux,grade
1,Bruno,33,1.69,False,7
2,César,34,1.77,False,8


In [77]:
create_pandas_dataframe().loc[1]

name         Bruno
age             33
height        1.69
has_linux    False
grade            7
Name: 1, dtype: object

In [79]:
create_pandas_dataframe()[create_pandas_dataframe()['grade'] > 7]

Unnamed: 0,name,age,height,has_linux,grade
0,Ana,28,1.72,True,8
2,César,34,1.77,False,8
3,Diego,23,1.79,True,10


In [81]:
create_pandas_dataframe()[create_pandas_dataframe()['has_linux'] == True]

Unnamed: 0,name,age,height,has_linux,grade
0,Ana,28,1.72,True,8
3,Diego,23,1.79,True,10
4,Erika,25,1.69,True,7


In [82]:
create_pandas_dataframe()[(create_pandas_dataframe()['grade'] > 7) & (create_pandas_dataframe()['has_linux'] == True)]

Unnamed: 0,name,age,height,has_linux,grade
0,Ana,28,1.72,True,8
3,Diego,23,1.79,True,10


### Modify a DataFrame

create_pandas_dataframe().sort_values("grade")

In [84]:
create_pandas_dataframe().sort_values("grade", ascending=False)

Unnamed: 0,name,age,height,has_linux,grade
3,Diego,23,1.79,True,10
0,Ana,28,1.72,True,8
2,César,34,1.77,False,8
1,Bruno,33,1.69,False,7
4,Erika,25,1.69,True,7


In [85]:
create_pandas_dataframe().rename(columns={
    "name": "first_name"
})

Unnamed: 0,first_name,age,height,has_linux,grade
0,Ana,28,1.72,True,8
1,Bruno,33,1.69,False,7
2,César,34,1.77,False,8
3,Diego,23,1.79,True,10
4,Erika,25,1.69,True,7


In [86]:
create_pandas_dataframe().drop(columns=["height"])

Unnamed: 0,name,age,has_linux,grade
0,Ana,28,True,8
1,Bruno,33,False,7
2,César,34,False,8
3,Diego,23,True,10
4,Erika,25,True,7


### Group by

In [89]:
create_pandas_dataframe().groupby(by="age")["grade"].count()

age
23    1
25    1
28    1
33    1
34    1
Name: grade, dtype: int64

In [90]:
create_pandas_dataframe().groupby(by="age")["grade"].max()

age
23    10
25     7
28     8
33     7
34     8
Name: grade, dtype: int64

In [91]:
create_pandas_dataframe().groupby(by="age")["grade"].mean()

age
23    10.0
25     7.0
28     8.0
33     7.0
34     8.0
Name: grade, dtype: float64