In [1]:
import pandas as pd

In [2]:
# initial_data = [
#     {'id': 1, 'firstname': 'Manuel', 'lastname': 'Wiedenmann', 'balance': 50.0},
# ]
accounts = pd.DataFrame(columns=['id', 'firstname', 'lastname', 'balance'])

data = [
    {'id': 1, 'firstname': 'Manuel', 'lastname': 'Wiedenmann', 'balance': 50.0},
    {'id': 2, 'firstname': 'Matthias', 'lastname': 'Rettenmeier', 'balance': 100.0},
    {'id': 3, 'firstname': 'Matthias', 'lastname': 'Rettenmeier', 'balance': 20.0},
]

for value in data:
    accounts = accounts.append(value, ignore_index=True)

accounts.to_csv('../data/bank/accounts.csv')

In [3]:
accounts.head()

Unnamed: 0,id,firstname,lastname,balance
0,1,Manuel,Wiedenmann,50.0
1,2,Matthias,Rettenmeier,100.0
2,3,Matthias,Rettenmeier,20.0


In [4]:
accounts.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
id           3 non-null object
firstname    3 non-null object
lastname     3 non-null object
balance      3 non-null float64
dtypes: float64(1), object(3)
memory usage: 176.0+ bytes


In [5]:
accounts.index

RangeIndex(start=0, stop=3, step=1)

In [6]:
accounts.shape

(3, 4)

In [7]:
accounts.columns

Index(['id', 'firstname', 'lastname', 'balance'], dtype='object')

In [None]:
accounts.dtypes

In [7]:
accounts['id']

0    1
1    2
2    3
Name: id, dtype: int64

In [8]:
accounts['firstname']

0      Manuel
1    Matthias
2    Matthias
Name: firstname, dtype: object

In [9]:
accounts['lastname']

0     Wiedenmann
1    Rettenmeier
2    Rettenmeier
Name: lastname, dtype: object

In [None]:
accounts['balance']

In [5]:
accounts.describe()

Unnamed: 0,id,balance
count,3.0,3.0
mean,2.0,56.666667
std,1.0,40.414519
min,1.0,20.0
25%,1.5,35.0
50%,2.0,50.0
75%,2.5,75.0
max,3.0,100.0


In [6]:
accounts['id'] = accounts['id'].astype('int')

In [None]:
accounts.dtypes

In [None]:
accounts.info()

In [None]:
accounts.describe()

In [None]:
accounts

## Transactions

In [None]:
import datetime
import random
import time
import uuid

In [None]:
def random_timestamp():
    days = random.randint(1,365)
    now = datetime.datetime.utcnow()
    delta = datetime.timedelta(days=days)
    return now - delta

def short_uuid():
    return str(uuid.uuid4())[:8]

In [None]:
initial_data = [
    {'id': short_uuid(), 'sender_id': 3, 'recipient_id': 2, 'amount': 15.0, 'subject': '', 'category': '', 'timestamp': random_timestamp()},
]
    
transactions = pd.DataFrame(data=initial_data, columns=['id', 'sender_id', 'recipient_id', 'amount', 'subject', 'category', 'timestamp'])

data = [
    {'id': short_uuid(), 'sender_id': 1, 'recipient_id': 2, 'amount': 10.0, 'subject': '', 'category': '', 'timestamp': random_timestamp()},
    {'id': short_uuid(), 'sender_id': 1, 'recipient_id': 3, 'amount': 5.0, 'subject': '', 'category': '', 'timestamp': random_timestamp()},
    {'id': short_uuid(), 'sender_id': 2, 'recipient_id': 1, 'amount': 20.0, 'subject': '', 'category': '', 'timestamp': random_timestamp()},
    {'id': short_uuid(), 'sender_id': 3, 'recipient_id': 1, 'amount': 20.0, 'subject': '', 'category': '', 'timestamp': random_timestamp()},
]

for value in data:
    transactions = transactions.append(value, ignore_index=True)
    
accounts.to_csv('../data/bank/transactions.csv')

In [None]:
transactions

In [None]:
transactions.info()

In [None]:
transactions.describe()

In [None]:
transactions.sort_values('amount')

In [None]:
transactions

### .loc[]

http://pandas.pydata.org/pandas-docs/version/0.24/reference/api/pandas.DataFrame.loc.html

> **Warning** Note that contrary to usual python slices, both the start and the stop are included

In [None]:
# Gibt uns die Liste aller amounts
transactions.loc[:, 'amount'] # äquivalent zu transactions['amount']

In [None]:
# Gibt uns die Liste der ersten zwei amounts
transactions.loc[0:1, 'amount']

In [None]:
# Gibt uns die Liste der letzten zwei amounts
transactions.loc[3:4, 'amount']

In [None]:
# Gibt uns die Liste ab dem 2. bis zum 4. amount
transactions.loc[1:3, 'amount']

In [None]:
# Gibt uns die Liste ab dem 3. amount bis zum Ende
transactions.loc[2:, 'amount']

In [None]:
# Gibt uns die Liste vom Anfang bis zum 4. amount
transactions.loc[:3, 'amount']

## .loc[] als Filter

In [None]:
# Gibt uns die Anzahl aller Transaktionen mit dem amount == 20
len(transactions.loc[transactions['amount'] == 20])

In [None]:
# Gibt uns die rows/Transaktionen mit dem amount == 20
transactions.loc[transactions['amount'] == 20]

In [None]:
# Gibt uns die Anzahl aller Transactionen mit dem amount < 20
len(transactions.loc[transactions['amount'] < 20])

In [None]:
transactions.loc[transactions['amount'] < 20]

In [None]:
# Gibt uns die Anzahl aller Transactionen mit dem amount == 7
len(transactions.loc[transactions['amount'] == 7])

In [None]:
transactions.loc[transactions['amount'] == 7]