## Import Modules
- Module yang digunakan adalah pandas dan numpy
- Menampilkan versi pandas dan numpy yang sedang digunakan

In [1]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

1.1.3
1.19.2


# #17: Resampling pada data deret waktu (time series data)
Resampling data dalam data time series adalah pengelompokan ulang data berdasarkan interval waktu tertentu

### `Persiapan Data Frame`

In [4]:
n_rows = 365 * 24
n_cols = 2
cols = ['col1', 'col2']

df = pd.DataFrame(np.random.randint(1,20, size=(n_rows, n_cols)),
                 columns=cols)
df

df.index = pd.util.testing. makeDateIndex(n_rows, freq = 'H')
df

Unnamed: 0,col1,col2
2000-01-01 00:00:00,16,18
2000-01-01 01:00:00,7,10
2000-01-01 02:00:00,18,13
2000-01-01 03:00:00,18,9
2000-01-01 04:00:00,9,7
...,...,...
2000-12-30 19:00:00,17,14
2000-12-30 20:00:00,19,9
2000-12-30 21:00:00,9,5
2000-12-30 22:00:00,9,16


# Resampling data dengan interval monthly
- `('M')` Resampling berbasis bulan
- Fungsi agregasi `sum()`
- `to_frame()` agar tampilannya lebih bagus dan informatif

In [6]:
df.resample('M')['col1'].sum().to_frame()

Unnamed: 0,col1
2000-01-31,7312
2000-02-29,6883
2000-03-31,7635
2000-04-30,7211
2000-05-31,7440
2000-06-30,7291
2000-07-31,7502
2000-08-31,7298
2000-09-30,7248
2000-10-31,7290


# Resampling data dengan interval daily
`('D')` Resampling berbasis harian

In [7]:
df.resample('D')['col1'].sum().to_frame()

Unnamed: 0,col1
2000-01-01,263
2000-01-02,228
2000-01-03,255
2000-01-04,274
2000-01-05,200
...,...
2000-12-26,226
2000-12-27,209
2000-12-28,268
2000-12-29,241


## Import Modules
- Module yang digunakan adalah pandas dan numpy
- Menampilkan versi pandas dan numpy yang sedang digunakan

In [12]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

1.1.3
1.19.2


# #18: Membentuk dummy Data Frame
- Memanfaatkan bentuk data Dictionary
- Memanfaatkan bentuk data Numpy Array

## Membentuk Data Frame dari Dictionary

In [13]:
pd.DataFrame({'col1':[1, 2, 3, 4],
              'col2':[5, 6, 7, 8]})

Unnamed: 0,col1,col2
0,1,5
1,2,6
2,3,7
3,4,8


## Membentuk Data Frame dari Numpy Array

In [14]:
n_rows = 5
n_cols = 3

arr = np.random.randint(1,20, size=(n_rows, n_cols))
arr


array([[15, 19, 19],
       [ 3, 12, 18],
       [13, 16, 19],
       [ 2, 11, 18],
       [17, 16, 18]])

In [15]:
pd.DataFrame(arr, columns=tuple('ABC'))

Unnamed: 0,A,B,C
0,15,19,19
1,3,12,18
2,13,16,19
3,2,11,18
4,17,16,18


## Membentuk Data Frame dengan menafaatkan `pandas.util.testing`
- `head()` efeknya hanya menampilkan 5 baris pertama

In [16]:
pd.util.testing.makeDataFrame().head()

Unnamed: 0,A,B,C,D
Q8QdGfcKKy,0.460346,-1.213116,-0.425235,0.333899
llS9FQtnBT,1.043294,-1.093364,-1.773757,1.467885
ww8VAnx0EV,0.876649,-0.529566,0.266037,-0.045169
ipHuJTRyEa,0.718759,-0.143324,0.115981,0.751609
Kr5iNYLSwf,-2.016919,0.078531,0.235939,-1.002402


In [17]:
pd.util.testing.makeMixedDataFrame().head()

Unnamed: 0,A,B,C,D
0,0.0,0.0,foo1,2009-01-01
1,1.0,1.0,foo2,2009-01-02
2,2.0,0.0,foo3,2009-01-05
3,3.0,1.0,foo4,2009-01-06
4,4.0,0.0,foo5,2009-01-07


In [18]:
pd.util.testing.makeTimeDataFrame().head()

Unnamed: 0,A,B,C,D
2000-01-03,0.027601,0.551496,-0.605236,0.06094
2000-01-04,0.432126,-0.393559,-1.824456,0.812758
2000-01-05,-0.853371,-0.601547,0.998682,0.680254
2000-01-06,0.802158,-0.72619,-0.725625,-0.247566
2000-01-07,-0.611567,0.762307,-0.210727,-1.250067


In [21]:
pd.util.testing.makeMissingDataframe().head()

Unnamed: 0,A,B,C,D
4gI8jwOS1S,-0.158704,1.279188,1.394031,0.809576
HjFoQwqx2Z,-0.52666,-1.647664,0.345796,
NL4DDfkFdp,0.075901,0.67861,1.389069,
MvxWSdfwkO,-0.192689,-0.434147,2.991289,0.391216
Zdfieqvlm3,0.626328,0.311492,-1.082461,-0.404922


## Import Modules
- Module yang digunakan adalah pandas dan numpy
- Menampilkan versi pandas dan numpy yang sedang digunakan

In [42]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

1.1.3
1.19.2


# #19: Formatting tampilan Data Frame

### `Persiapan Data Frame`

In [52]:
n_rows = 5
n_cols = 2
cols = ['omset', 'operasional']

df = pd.DataFrame(np.random.randint(1,20, size=(n_rows, n_cols)),
                 columns=cols)
df

Unnamed: 0,omset,operasional
0,10,3
1,10,11
2,11,12
3,15,18
4,19,8


In [53]:
df['omset'] = df['omset'] * 100_000
df['operasional'] = df['operasional'] * 10_000
df

Unnamed: 0,omset,operasional
0,1000000,30000
1,1000000,110000
2,1100000,120000
3,1500000,180000
4,1900000,80000


In [54]:
df.index = pd.util.testing.makeDateIndex(n_rows, freq = 'D')
df = df.reset_index()
df = df.rename(columns={'index':'tanggal'}) 
df

Unnamed: 0,tanggal,omset,operasional
0,2000-01-01,1000000,30000
1,2000-01-02,1000000,110000
2,2000-01-03,1100000,120000
3,2000-01-04,1500000,180000
4,2000-01-05,1900000,80000


## Melakukan formatting tampilan Data Frame

In [55]:
formatku = {'tanggal':'{:%d/%m/%y}',
            'operasional' :'Rp {:.2f}',
            'omset' :'Rp {:.2f}'}

laporan = df.style.format(formatku)
laporan

Unnamed: 0,tanggal,omset,operasional
0,01/01/00,Rp 1000000.00,Rp 30000.00
1,02/01/00,Rp 1000000.00,Rp 110000.00
2,03/01/00,Rp 1100000.00,Rp 120000.00
3,04/01/00,Rp 1500000.00,Rp 180000.00
4,05/01/00,Rp 1900000.00,Rp 80000.00


In [56]:
type(laporan)

pandas.io.formats.style.Styler

In [57]:
laporan.hide_index()

tanggal,omset,operasional
01/01/00,Rp 1000000.00,Rp 30000.00
02/01/00,Rp 1000000.00,Rp 110000.00
03/01/00,Rp 1100000.00,Rp 120000.00
04/01/00,Rp 1500000.00,Rp 180000.00
05/01/00,Rp 1900000.00,Rp 80000.00


In [58]:
laporan.set_caption('Data Omset dan Operasional')

tanggal,omset,operasional
01/01/00,Rp 1000000.00,Rp 30000.00
02/01/00,Rp 1000000.00,Rp 110000.00
03/01/00,Rp 1100000.00,Rp 120000.00
04/01/00,Rp 1500000.00,Rp 180000.00
05/01/00,Rp 1900000.00,Rp 80000.00


In [59]:
laporan.highlight_min('omset', color='pink')
laporan.highlight_max('omset', color='lightgreen')

laporan.highlight_min('operasional', color='lightblue')
laporan.highlight_max('operasional', color='grey')

tanggal,omset,operasional
01/01/00,Rp 1000000.00,Rp 30000.00
02/01/00,Rp 1000000.00,Rp 110000.00
03/01/00,Rp 1100000.00,Rp 120000.00
04/01/00,Rp 1500000.00,Rp 180000.00
05/01/00,Rp 1900000.00,Rp 80000.00


## Import Modules
- Module yang digunakan adalah pandas
- Menampilkan versi pandas yang sedang digunakan

In [61]:
import pandas as pd

print(pd.__version__)

1.1.3


# #20: Menggabungkan (merge) dua Data Frame secara berdampingan

### `Persiapan Data Frame`

In [62]:
d1 = {'col1': [1, 2, 3],
      'col2': [10, 20, 30]}
df1 = pd.DataFrame(d1)
df1

Unnamed: 0,col1,col2
0,1,10
1,2,20
2,3,30


In [63]:
d2 = {'col3': [4, 5, 6],
      'col4': [40, 50, 60]}
df2 = pd.DataFrame(d2)
df2

Unnamed: 0,col3,col4
0,4,40
1,5,50
2,6,60


## Menggabungkan (merge) dua Data Frame secara berdampingan berdasarkan kolom

In [65]:
df = pd.merge(df1, df2, left_index=True, right_index=True)
df

Unnamed: 0,col1,col2,col3,col4
0,1,10,4,40
1,2,20,5,50
2,3,30,6,60
