# **Import Modules**

---



In [1]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

1.3.5
1.21.6




---

# Pandas 17 | Resampling pada data deret waktu

---





## Persiapan Data Frame

In [5]:
n_rows = 365 * 24 # 365 hari x 24 jam
n_cols = 2
cols = ['col_1', 'col_2']

df = pd.DataFrame(np.random.randint(1, 20, size=(n_rows, n_cols)), columns=cols)
df.index = pd.util.testing.makeDateIndex(n_rows, freq='H')

df

Unnamed: 0,col_1,col_2
2000-01-01 00:00:00,9,12
2000-01-01 01:00:00,3,1
2000-01-01 02:00:00,4,16
2000-01-01 03:00:00,13,4
2000-01-01 04:00:00,7,18
...,...,...
2000-12-30 19:00:00,16,11
2000-12-30 20:00:00,6,15
2000-12-30 21:00:00,3,17
2000-12-30 22:00:00,12,4



## Resampling data dengan interval monthly





In [9]:
# resample('M') --> M = resampling interval tiap bulan
df.resample('M')['col_1'].sum().to_frame()

Unnamed: 0,col_1
2000-01-31,7559
2000-02-29,6968
2000-03-31,7538
2000-04-30,7100
2000-05-31,7220
2000-06-30,7351
2000-07-31,7238
2000-08-31,7209
2000-09-30,7279
2000-10-31,7631



## Resampling data dengan interval daily





In [10]:
# resample('D') --> D = resampling interval tiap hari
df.resample('D')['col_1'].sum().to_frame()

Unnamed: 0,col_1
2000-01-01,212
2000-01-02,267
2000-01-03,222
2000-01-04,209
2000-01-05,240
...,...
2000-12-26,217
2000-12-27,209
2000-12-28,203
2000-12-29,206




---

# Pandas 18 | Membentuk dummy Data Frame

---





## Persiapan Data Frame dengan Dictionary

In [12]:
pd.DataFrame({'col_1':[1,2,3,4], 'col_2':[5,6,7,8]})

Unnamed: 0,col_1,col_2
0,1,5
1,2,6
2,3,7
3,4,8


## Membentuk DataFrame dari Numpy Array

In [17]:
n_rows = 5
n_cols = 3

arr = np.random.randint(1, 20, size=(n_rows, n_cols))
arr

array([[ 9, 10, 14],
       [ 3, 14, 15],
       [ 2, 13,  2],
       [15,  1,  9],
       [ 2,  6, 15]])

In [18]:
pd.DataFrame(arr, columns=tuple('ABC'))

Unnamed: 0,A,B,C
0,9,10,14
1,3,14,15
2,2,13,2
3,15,1,9
4,2,6,15



## Membentuk DataFrame dengan memanfaatkan **pandas.util.testing**





In [19]:
pd.util.testing.makeDataFrame().head()

Unnamed: 0,A,B,C,D
UZgA9rtKNj,1.972787,0.063097,-1.043876,-0.45722
0NzSXh4ZAW,-0.783605,2.57067,2.650134,0.072338
SxLMhWEKuL,2.846003,0.648947,-0.061246,-0.643084
91lavBRRhy,-0.728824,0.549444,0.292935,0.452225
bkyAc0XKli,0.537536,0.248167,0.448837,0.005534


In [20]:
# DataFrame dengan campuran tipe data
pd.util.testing.makeMixedDataFrame().head()

Unnamed: 0,A,B,C,D
0,0.0,0.0,foo1,2009-01-01
1,1.0,1.0,foo2,2009-01-02
2,2.0,0.0,foo3,2009-01-05
3,3.0,1.0,foo4,2009-01-06
4,4.0,0.0,foo5,2009-01-07


In [21]:
# DataFrame dengan index berupa waktu
pd.util.testing.makeTimeDataFrame().head()

Unnamed: 0,A,B,C,D
2000-01-03,-0.093101,1.519867,0.758626,0.102482
2000-01-04,-1.269177,-0.181641,1.931644,1.354621
2000-01-05,-1.077382,-2.169222,-1.383651,0.761256
2000-01-06,0.146138,-0.74116,-1.821126,-1.128813
2000-01-07,-0.206675,0.19183,1.200575,1.231284


In [23]:
# DataFrame dengan ada beberapa nilai datanya yang NaN atau null
pd.util.testing.makeMissingDataframe().head()

Unnamed: 0,A,B,C,D
FNTh8mknFT,,0.207142,0.473827,-1.013271
V0TpHnrd24,-1.082026,-0.27512,0.089085,0.422982
GYP4m7OsEt,-0.251424,2.363055,,0.337756
HP6upygBdZ,-0.544216,0.249923,0.223397,-0.51707
RjGakKGlWh,0.260928,0.558773,1.244823,0.907824




---

# Pandas 19 | Formatting tampilan Data Frame
---





## Persiapan Data Frame

In [34]:
n_rows = 4
n_cols = 2
cols = ['omset', 'operasional']

df = pd.DataFrame(np.random.randint(1, 20, size=(n_rows, n_cols)), columns=cols)

df

Unnamed: 0,omset,operasional
0,6,5
1,13,19
2,5,11
3,3,4


In [35]:
df['omset'] = df['omset'] * 100_000
df['operasional'] = df['operasional'] * 10_000
df

Unnamed: 0,omset,operasional
0,600000,50000
1,1300000,190000
2,500000,110000
3,300000,40000


In [36]:
df.index = pd.util.testing.makeDateIndex(n_rows, freq='D')
df = df.reset_index()
df = df.rename(columns={'index' : 'tanggal'})
df

Unnamed: 0,tanggal,omset,operasional
0,2000-01-01,600000,50000
1,2000-01-02,1300000,190000
2,2000-01-03,500000,110000
3,2000-01-04,300000,40000



## Melakukan formatting tampilan DataFrame





In [37]:
format_dict = {
    'tanggal' : '{:%d/%m/%y}',
    'operasional' : 'Rp {:.2f}',
    'omset' : 'Rp {:.2f}'
}

laporan = df.style.format(format_dict)
laporan

Unnamed: 0,tanggal,omset,operasional
0,01/01/00,Rp 600000.00,Rp 50000.00
1,02/01/00,Rp 1300000.00,Rp 190000.00
2,03/01/00,Rp 500000.00,Rp 110000.00
3,04/01/00,Rp 300000.00,Rp 40000.00


In [39]:
# Ketika menggunakan method style.format, maka typenya menjadi Styler
type(laporan)

pandas.io.formats.style.Styler

In [41]:
# hide_index() --> Untuk menghilangkan index yg ada pada output line ke-37 diatas
laporan.hide_index()

tanggal,omset,operasional
01/01/00,Rp 600000.00,Rp 50000.00
02/01/00,Rp 1300000.00,Rp 190000.00
03/01/00,Rp 500000.00,Rp 110000.00
04/01/00,Rp 300000.00,Rp 40000.00


In [42]:
laporan.set_caption('Data Omset dan Operasional')

tanggal,omset,operasional
01/01/00,Rp 600000.00,Rp 50000.00
02/01/00,Rp 1300000.00,Rp 190000.00
03/01/00,Rp 500000.00,Rp 110000.00
04/01/00,Rp 300000.00,Rp 40000.00


In [44]:
laporan.highlight_min('omset', color='pink')
laporan.highlight_max('omset', color='lightgreen')

laporan.highlight_min('operasional', color='blue')
laporan.highlight_max('operasional', color='grey')

tanggal,omset,operasional
01/01/00,Rp 600000.00,Rp 50000.00
02/01/00,Rp 1300000.00,Rp 190000.00
03/01/00,Rp 500000.00,Rp 110000.00
04/01/00,Rp 300000.00,Rp 40000.00


In [None]:
# resample('D') --> D = resampling interval tiap hari
df.resample('D')['col_1'].sum().to_frame()

Unnamed: 0,col_1
2000-01-01,212
2000-01-02,267
2000-01-03,222
2000-01-04,209
2000-01-05,240
...,...
2000-12-26,217
2000-12-27,209
2000-12-28,203
2000-12-29,206




---

# Pandas 20 | Menggabungkan dua Data Frame secara berdampingan

---





## Persiapan Data Frame

In [45]:
d1 = {
    'col_1' : [1,2,3],
    'col_2' : [10,20,30]
}

df1 = pd.DataFrame(d1)
df1

Unnamed: 0,col_1,col_2
0,1,10
1,2,20
2,3,30


In [47]:
d2 = {
    'col_3' : [4,5,6],
    'col_4' : [40,50,60]
}

df2 = pd.DataFrame(d2)
df2

Unnamed: 0,col_3,col_4
0,4,40
1,5,50
2,6,60


## Merging dua DataFrame secara berdampingan



In [52]:
df = pd.merge(df1, df2, left_index = True, right_index = True)
df

Unnamed: 0,col_1,col_2,col_3,col_4
0,1,10,4,40
1,2,20,5,50
2,3,30,6,60
