# Referenties

Altijd beginnen met 'getting started':

https://pandas.pydata.org/docs/getting_started/index.html#getting-started

Dan een snel overzicht:

https://pandas.pydata.org/docs/user_guide/10min.html

Daarna kijken naar de API beschrijving:

https://pandas.pydata.org/docs/reference/index.html

# Installatie

Run `pip3 install pandas` in the terminal to install the Pandas library.

# Imports

In [1]:
import pandas as pd

import matplotlib.pyplot as plt

# Schetsen

In [2]:
# Dataframe maken
df = pd.DataFrame(
    {
        "Name": [
            "Braund, Mr. Owen Harris",
            "Allen, Mr. William Henry",
            "Bonnell, Miss. Elizabeth",
        ],
        "Age": [
            22,
            35,
            58
        ],
        "Sex": [
            "male",
            "male",
            "female"
        ],
    }
)

# Standaard print
print(df)

# Vergelijk met
df

                       Name  Age     Sex
0   Braund, Mr. Owen Harris   22    male
1  Allen, Mr. William Henry   35    male
2  Bonnell, Miss. Elizabeth   58  female


Unnamed: 0,Name,Age,Sex
0,"Braund, Mr. Owen Harris",22,male
1,"Allen, Mr. William Henry",35,male
2,"Bonnell, Miss. Elizabeth",58,female


In [3]:
# Data opslaan
# df.to_csv("./output/persons.csv")
df.to_csv("./output/persons.csv", index=False)

In [4]:
# Data inlezen
persons = pd.read_csv("./output/persons.csv")

print(persons)

                       Name  Age     Sex
0   Braund, Mr. Owen Harris   22    male
1  Allen, Mr. William Henry   35    male
2  Bonnell, Miss. Elizabeth   58  female


In [5]:
# Een kolom lezen
persons["Name"]

0     Braund, Mr. Owen Harris
1    Allen, Mr. William Henry
2    Bonnell, Miss. Elizabeth
Name: Name, dtype: object

In [6]:
# De kolomnaam
persons.columns[1]

'Age'

In [7]:
# Een kolom verwijderen
# persons.pop("Unnamed: 0")

persons

Unnamed: 0,Name,Age,Sex
0,"Braund, Mr. Owen Harris",22,male
1,"Allen, Mr. William Henry",35,male
2,"Bonnell, Miss. Elizabeth",58,female


In [8]:
# Een rij lezen
persons.loc[2]

Name    Bonnell, Miss. Elizabeth
Age                           58
Sex                       female
Name: 2, dtype: object

In [9]:
# Functie getRow() maken
def getRow(df, index):

    return df.loc[index]


# Testen
print(getRow(persons, 2))

Name    Bonnell, Miss. Elizabeth
Age                           58
Sex                       female
Name: 2, dtype: object


In [10]:
# Een rij toevoegen
persons.loc[3] = ["Ruud", 43, "male"]

persons

Unnamed: 0,Name,Age,Sex
0,"Braund, Mr. Owen Harris",22,male
1,"Allen, Mr. William Henry",35,male
2,"Bonnell, Miss. Elizabeth",58,female
3,Ruud,43,male


In [11]:
# Functie insertAt(row, index)
def insertAt(df, row, index):
    
    df.loc[index + .5] = row
    
    # Reindex
    return df.sort_index().reset_index(drop=True)

# Testen
persons = insertAt(persons, ["Kevin", 40, "male"], 2)

persons

Unnamed: 0,Name,Age,Sex
0,"Braund, Mr. Owen Harris",22,male
1,"Allen, Mr. William Henry",35,male
2,"Bonnell, Miss. Elizabeth",58,female
3,Kevin,40,male
4,Ruud,43,male


In [12]:
# Verkennen
persons.head()

Unnamed: 0,Name,Age,Sex
0,"Braund, Mr. Owen Harris",22,male
1,"Allen, Mr. William Henry",35,male
2,"Bonnell, Miss. Elizabeth",58,female
3,Kevin,40,male
4,Ruud,43,male


In [13]:
# Filteren
persons[persons["Age"] < 40]

Unnamed: 0,Name,Age,Sex
0,"Braund, Mr. Owen Harris",22,male
1,"Allen, Mr. William Henry",35,male


In [38]:
# Groeperen
persons.groupby("Sex", group_keys=True).apply(lambda x: x)

Unnamed: 0,Name,Age,Sex
0,"Braund, Mr. Owen Harris",22,male
1,"Allen, Mr. William Henry",35,male
2,"Bonnell, Miss. Elizabeth",58,female
3,Kevin,40,male
4,Ruud,43,male


In [55]:
# Extra: toepassing OOP en het bouwen van je eigen library
class MyDataFrame:
    
    # Here we assume that df is of type pd.DataFrame
    def __init__(self, df):
        
        self.df = df


    def getRow(self, index):

        return self.df.loc[index]


    def insertAt(self, row, index):
    
        self.df.loc[index + .5] = row

        # Reindex
        self =  self.sort_index().reset_index(drop=True)


    # This builtin enables the [] operator
    def __getitem__(self, key):
        
        return self.df[key]


    # This builtin enables us to print the dataframe
    def __str__(self):
        
        self.df 


        
# Testen
copy = MyDataFrame(persons)

copy["Name"]
copy.getRow(2)

Name    Bonnell, Miss. Elizabeth
Age                           58
Sex                       female
Name: 2, dtype: object