# Pandas tests


[pandas API docs](https://pandas.pydata.org/docs/reference/index.html)

In [63]:
import pandas as pd
import os
from pathlib import Path
import numpy as np

In [2]:
csv_dir_path = Path.cwd() / "csv_test"
if not Path.is_dir(csv_dir_path):
    os.mkdir(csv_dir_path)

## create new dataframe from dict

In [71]:
data_dict = {
    "name": ["Albert", "Jean", "François"],
    "age": [35, 10, 51],
    "pob": ["Paris", "Nancy", "Toulouse"]
}
df = pd.DataFrame.from_dict(data=data_dict)
df

Unnamed: 0,name,age,pob
0,Albert,35,Paris
1,Jean,10,Nancy
2,François,51,Toulouse


In [62]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    3 non-null      object
 1   age     3 non-null      int64 
 2   pob     3 non-null      object
dtypes: int64(1), object(2)
memory usage: 204.0+ bytes


In [22]:
df["name"][1]

'Jean'

In [21]:
list(df.index)

[0, 1, 2]

## write to csv

In [25]:
df.to_csv(str(csv_dir_path / "csv_test.csv"), index=False)

## read from csv

In [5]:
df = pd.read_csv(str(csv_dir_path / "csv_test.csv"))
df

Unnamed: 0,name,age,pob
0,Albert,35,Paris
1,Jean,10,Nancy
2,François,51,Toulouse


## use column as index

In [13]:
df = df.set_index("name")
df

Unnamed: 0_level_0,age,pob
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Albert,35,Paris
Jean,10,Nancy
François,51,Toulouse


In [15]:
# undo
df = df.reset_index()
df

Unnamed: 0,name,age,pob
0,Albert,35,Paris
1,Jean,10,Nancy
2,François,51,Toulouse


## access a single value (at, iat, loc, iloc)

In [20]:
df.at[1, "pob"]

'Nancy'

In [22]:
# set a single value
df.at[1, "pob"] = "Grenoble"
df

Unnamed: 0,name,age,pob
0,Albert,35,Paris
1,Jean,10,Grenoble
2,François,51,Toulouse


In [27]:
df.iat[0 ,1]

32

In [28]:
df.iat[0, 1] = 32
df

Unnamed: 0,name,age,pob
0,Albert,32,Paris
1,Jean,10,Grenoble
2,François,51,Toulouse


In [30]:
df.loc[0, "age"]

32

In [31]:
df.iloc[0, 1]

32

In [32]:
df.loc[0, "age"] = 30
df

Unnamed: 0,name,age,pob
0,Albert,30,Paris
1,Jean,10,Grenoble
2,François,51,Toulouse


## read/add a column, line (loc, iloc)

In [38]:
# read column
df["age"]

0    30
1    10
2    51
Name: age, dtype: int64

In [39]:
df.loc[:, "age"]

0    30
1    10
2    51
Name: age, dtype: int64

In [44]:
df.iloc[:, 1]

0    30
1    10
2    51
Name: age, dtype: int64

In [35]:
# read columns (subtable)
df[["name", "pob"]]

Unnamed: 0,name,pob
0,Albert,Paris
1,Jean,Grenoble
2,François,Toulouse


In [41]:
df.loc[:, ["name", "pob"]]

Unnamed: 0,name,pob
0,Albert,Paris
1,Jean,Grenoble
2,François,Toulouse


In [45]:
df.iloc[:, [0, 2]]

Unnamed: 0,name,pob
0,Albert,Paris
1,Jean,Grenoble
2,François,Toulouse


In [48]:
# read line
df.loc[0]

name    Albert
age         30
pob      Paris
Name: 0, dtype: object

In [47]:
df.loc[0, :]

name    Albert
age         30
pob      Paris
Name: 0, dtype: object

In [49]:
# read lines
df.loc[[0,1], :]

Unnamed: 0,name,age,pob
0,Albert,30,Paris
1,Jean,10,Grenoble


## add a row

In [73]:
# with loc
df.loc[len(df)] = ["Pierre", 15, "Lyon"]
df

Unnamed: 0,name,age,pob
0,Albert,35,Paris
1,Jean,10,Nancy
2,François,51,Toulouse
3,Pierre,15,Lyon


In [93]:
# with concat
new_row = pd.Series({"name": "Jacques", "age": 27, "pob": "Brest"})
df = pd.concat([df, new_row.to_frame().T], ignore_index=True)
df

Unnamed: 0,name,age,pob
0,Albert,35,Paris
1,Jean,10,Nancy
2,François,51,Toulouse
3,Pierre,15,Lyon
4,Jacques,27,Brest


## apply, map

In [95]:
# add a column "is_adult"
def is_adult(row):
    if int(row["age"]) >= 18:
        return True
    else:
        return False

df["is_adult"] = df.apply(is_adult, axis=1)
df
# df["is_adult"] = 
# df

Unnamed: 0,name,age,pob,is_adult
0,Albert,35,Paris,True
1,Jean,10,Nancy,False
2,François,51,Toulouse,True
3,Pierre,15,Lyon,False
4,Jacques,27,Brest,True


In [103]:
# map
df["age"].map(lambda x: x*2)

0     70
1     20
2    102
3     30
4     54
Name: age, dtype: int64

## select rows based on condition

In [104]:
# loc
df.loc[df["age"] >= 18]

Unnamed: 0,name,age,pob,is_adult
0,Albert,35,Paris,True
2,François,51,Toulouse,True
4,Jacques,27,Brest,True


In [105]:
df.loc[df["is_adult"]]

Unnamed: 0,name,age,pob,is_adult
0,Albert,35,Paris,True
2,François,51,Toulouse,True
4,Jacques,27,Brest,True


# DUMP

In [111]:
list(df.columns)

['name', 'age', 'pob', 'is_adult']