# Referenties

Altijd beginnen met 'getting started':

https://pandas.pydata.org/docs/getting_started/index.html#getting-started

Dan een snel overzicht:

https://pandas.pydata.org/docs/user_guide/10min.html

Daarna kijken naar de API beschrijving:

https://pandas.pydata.org/docs/reference/index.html

# Installatie

Run `pip3 install pandas` in the terminal to install the Pandas library.

# Imports

In [9]:
import pandas as pd

import matplotlib.pyplot as plt

# Schetsen

In [10]:
# Dataframe maken
df = pd.DataFrame(
    {
        "Name": [
            "Braund, Mr. Owen Harris",
            "Allen, Mr. William Henry",
            "Bonnell, Miss. Elizabeth",
        ],
        "Age": [
            22,
            35,
            58
        ],
        "Sex": [
            "male",
            "male",
            "female"
        ],
    }
)

# Standaard print
print(df)

# Vergelijk met
df

                       Name  Age     Sex
0   Braund, Mr. Owen Harris   22    male
1  Allen, Mr. William Henry   35    male
2  Bonnell, Miss. Elizabeth   58  female


Unnamed: 0,Name,Age,Sex
0,"Braund, Mr. Owen Harris",22,male
1,"Allen, Mr. William Henry",35,male
2,"Bonnell, Miss. Elizabeth",58,female


In [11]:
# Data opslaan
# df.to_csv("./output/persons.csv")
df.to_csv("./persons.csv", index=False)

In [12]:
# Data inlezen
persons = pd.read_csv("./persons.csv")

print(persons)

                       Name  Age     Sex
0   Braund, Mr. Owen Harris   22    male
1  Allen, Mr. William Henry   35    male
2  Bonnell, Miss. Elizabeth   58  female


In [13]:
# Een kolom lezen
persons["Name"]

0     Braund, Mr. Owen Harris
1    Allen, Mr. William Henry
2    Bonnell, Miss. Elizabeth
Name: Name, dtype: object

In [14]:
# De kolomnaam
persons.columns[1]

'Age'

In [15]:
# Een kolom verwijderen
persons.pop("Name")

persons

Unnamed: 0,Age,Sex
0,22,male
1,35,male
2,58,female


In [16]:
# Een rij lezen
persons.iloc[2]

Age        58
Sex    female
Name: 2, dtype: object

In [17]:
# Functie getRow() maken
def getRow(df, index):

    return df.iloc[index]


# Testen
print(getRow(persons, 2))

Age        58
Sex    female
Name: 2, dtype: object


In [18]:
# Een rij toevoegen
persons.loc[3] = ["Ruud", 44, "male"]

persons

ValueError: cannot set a row with mismatched columns

In [19]:
# Functie insertAt(row, index)
def insertAt(df, row, index):
    
    df.loc[index - .5] = row
    
    # Reindex
    return df.sort_index().reset_index(drop=True)

# Testen
persons = insertAt(persons, ["Kevin", 40, "male"], 2)

persons

ValueError: cannot set a row with mismatched columns

In [20]:
# Verkennen
persons.head()

Unnamed: 0,Age,Sex
0,22,male
1,35,male
2,58,female


In [21]:
# Filteren
persons[persons["Age"] < 40]

Unnamed: 0,Age,Sex
0,22,male
1,35,male


In [None]:
# Groeperen
persons.groupby("Sex", group_keys=True).apply(lambda x: x)

In [None]:
# Extra: toepassing OOP en het bouwen van je eigen library

# Methode 1: DataFrame "wrappen"
class MyDataFrame:
    
    # Here we assume that df is of type pd.DataFrame
    def __init__(self, df):
        
        self.df = df


    def getRow(self, index):

        return self.df.iloc[index]


    def insertAt(self, row, index):
    
        self.df.loc[index - .5] = row

        # Reindex
        self =  self.sort_index().reset_index(drop=True)


    # This builtin enables the [] operator
    def __getitem__(self, key):
        
        return self.df[key]


    # This builtin enables us to print the dataframe
    def __str__(self):
        
        print(self.df)


        
# Testen
copy = MyDataFrame(persons)

copy["Name"]
copy.getRow(2)

copy

In [None]:
# Methode 2: DataFrame "extend"
class MyDataFrame(pd.DataFrame):
    
    # Constructor for class
    def __init__(self, data=None, index=None, columns=None, dtype=None, copy=None):
        
        # Initialize the parent (the DataFrame)
        super().__init__(data, index, columns, dtype, copy)


    def getRow(self, index):

        return self.iloc[index]


    def insertAt(self, row, index):
    
        self.loc[index - .5] = row

        # Reindex
        self = self.sort_index().reset_index(drop=True)
        
# Testen
# mydf = MyDataFrame(persons)
mydf = MyDataFrame(
    {
        "Name": [
            "Braund, Mr. Owen Harris",
            "Allen, Mr. William Henry",
            "Bonnell, Miss. Elizabeth",
        ],
        "Age": [
            22,
            35,
            58
        ],
        "Sex": [
            "male",
            "male",
            "female"
        ],
    }
)

row = mydf.getRow(1)

row