# Resampling pada data deret waktu (Time Seris Data

# _Import moduls_

In [16]:
import pandas as pd
import numpy as np

# _Membentuk data frame yang akan di gunakan_
- 365 * 24
    - 365 = jumlah hari / tahun
    - 24 = jumlah jam / hari
- index dari data frame akan di isi dengan deret waktu dengan memanggil pd.util.testing.makeDateIndex 

In [18]:
n_rows = 365 * 24
n_cols = 2
cols = ['col1', 'col2']

df = pd.DataFrame(np.random.randint(1, 20, size=(n_rows, n_cols)),
                 columns=cols)

df.index = pd.util.testing.makeDateIndex(n_rows, freq='H')
df

Unnamed: 0,col1,col2
2000-01-01 00:00:00,2,1
2000-01-01 01:00:00,4,16
2000-01-01 02:00:00,6,13
2000-01-01 03:00:00,8,6
2000-01-01 04:00:00,6,2
...,...,...
2000-12-30 19:00:00,7,8
2000-12-30 20:00:00,2,3
2000-12-30 21:00:00,7,15
2000-12-30 22:00:00,12,1


# _Resampling data dengan interval monthly_
- Proses pengelompokan data per bulan

In [19]:
df.resample('M')['col1'].sum().to_frame()

Unnamed: 0,col1
2000-01-31,7373
2000-02-29,6872
2000-03-31,7134
2000-04-30,7271
2000-05-31,7495
2000-06-30,7435
2000-07-31,7464
2000-08-31,7393
2000-09-30,7274
2000-10-31,7520


# _Resampling data dengan interval daily_
- proses pengelompokan data per hari

In [21]:
df.resample('D')['col1'].sum().to_frame()

Unnamed: 0,col1
2000-01-01,202
2000-01-02,251
2000-01-03,222
2000-01-04,238
2000-01-05,219
...,...
2000-12-26,268
2000-12-27,250
2000-12-28,249
2000-12-29,250


# Membentuk Dummy Data Frame
- Membentuk Dummy data frame dengan 3 cara :
    - Menggunakan struktur data Dictionary
    - Menggunakan Numpy Array
    - Menggunakan pandas.util.testing

# _Dictionary_

In [23]:
pd.DataFrame({'col1':[1, 2, 3, 4],
               'col2':[5, 6, 7, 8]})

Unnamed: 0,col1,col2
0,1,5
1,2,6
2,3,7
3,4,8


# _Numpy Array_

In [24]:
n_rows = 5
n_cols = 3

arr = np.random.randint(1, 20, size=(n_rows, n_cols))
arr

array([[16,  6, 14],
       [ 4, 10,  8],
       [15,  6, 11],
       [11, 10, 18],
       [ 4, 16, 18]])

In [25]:
pd.DataFrame(arr, columns=tuple('ABC'))

Unnamed: 0,A,B,C
0,16,6,14
1,4,10,8
2,15,6,11
3,11,10,18
4,4,16,18


# _pandas.util.testing_
- .makeDataFrame() = Data Frame sederhana
- .makeMixedDataFrame() = Data Frame yang bercampur
- .makeTimeDataFrame() = Time series Data Frame
- .makeMissingDataframe() = Data Frame dengan beberapa Missing value

In [46]:
pd.util.testing.makeDataFrame().head()

Unnamed: 0,A,B,C,D
p0BBQmJCgU,-0.866623,0.789124,-0.302545,1.300374
K8fFIqNMah,2.459401,1.303734,-0.669603,-0.853529
202zq6ohYd,0.231455,-0.57626,0.564641,-0.541374
SBHYO4xS2U,2.479311,0.287504,0.257094,-0.831748
izyzAu4nYY,0.803697,-1.236468,-0.45026,-1.061903


In [29]:
pd.util.testing.makeMixedDataFrame().head()

Unnamed: 0,A,B,C,D
0,0.0,0.0,foo1,2009-01-01
1,1.0,1.0,foo2,2009-01-02
2,2.0,0.0,foo3,2009-01-05
3,3.0,1.0,foo4,2009-01-06
4,4.0,0.0,foo5,2009-01-07


In [30]:
pd.util.testing.makeTimeDataFrame().head()

Unnamed: 0,A,B,C,D
2000-01-03,0.14958,0.759616,0.853075,1.153849
2000-01-04,0.212539,0.254542,0.476114,-0.344399
2000-01-05,-1.007076,0.414017,1.336297,0.589773
2000-01-06,-0.186111,0.623775,-1.094176,-0.365051
2000-01-07,0.317765,-1.122836,-0.773397,-0.085617


In [32]:
pd.util.testing.makeMissingDataframe().head()

Unnamed: 0,A,B,C,D
1cKEGaUbnb,-1.426822,-1.171317,1.08703,-0.356772
1PsYtFxgJC,0.798611,0.607176,0.136021,-0.548551
xdsJdvqKD8,-0.726181,0.593418,,1.948427
hBtI7BwE2S,-0.162326,1.140414,-0.718237,0.231725
a8wb1GtFaR,0.472303,0.598209,-0.931102,-0.8881


# Formatting tampilan Data Frame

# _Persiapan Data Frame_
- 

In [59]:
n_rows = 5
n_cols = 2
cols = ['omset', 'operasional']

df = pd.DataFrame(np.random.randint(1, 20, size=(n_rows, n_cols)),
                 columns=cols)
df

Unnamed: 0,omset,operasional
0,8,6
1,9,13
2,17,19
3,16,13
4,17,7


- Mengkonversikan omset & operasional
    - omset * 100000
    - operasional * 10000
- 100_000 = penggunaan underscore "_" dalam python di kenal sebagai ignore character 
 

In [60]:
df['omset'] = df['omset'] * 100_000
df['operasional'] = df['operasional'] * 10_000
df

Unnamed: 0,omset,operasional
0,800000,60000
1,900000,130000
2,1700000,190000
3,1600000,130000
4,1700000,70000


- Membentuk kolom tanggal & menamai dengan 'tanggal'

In [61]:
df.index = pd.util.testing.makeDateIndex(n_rows, freq='D')
df = df.reset_index()
df = df.rename(columns={'index':'tanggal'})
df

Unnamed: 0,tanggal,omset,operasional
0,2000-01-01,800000,60000
1,2000-01-02,900000,130000
2,2000-01-03,1700000,190000
3,2000-01-04,1600000,130000
4,2000-01-05,1700000,70000


# _Formating tampilan Data Frame_
- Memformat omset & Operasional dengan penanda Rp
- Memformat waktu dari Tahun-Bulan-Tanggal menjadi Tanggal-Bulan-Tahun


In [62]:
formatku = {'tanggal':'{:%d/%m/%y}',
            'operasional':'Rp {:.2f}',
            'omset':'Rp {:.2f}'}

laporan = df.style.format(formatku)
laporan

Unnamed: 0,tanggal,omset,operasional
0,01/01/00,Rp 800000.00,Rp 60000.00
1,02/01/00,Rp 900000.00,Rp 130000.00
2,03/01/00,Rp 1700000.00,Rp 190000.00
3,04/01/00,Rp 1600000.00,Rp 130000.00
4,05/01/00,Rp 1700000.00,Rp 70000.00


- Object yang di simpan di variable laporan bukan bertipe data Data Frame lagi tapi menjadi object styler 
- object yang ada di laporan adalah styler

In [54]:
type(laporan)

pandas.io.formats.style.Styler

- Data Frame berbeda dengan styler oleh karena itu error

In [55]:
laporan.head()

AttributeError: 'Styler' object has no attribute 'head'

- Data Frame ada di df.

In [63]:
df.head()

Unnamed: 0,tanggal,omset,operasional
0,2000-01-01,800000,60000
1,2000-01-02,900000,130000
2,2000-01-03,1700000,190000
3,2000-01-04,1600000,130000
4,2000-01-05,1700000,70000


- Menyembunyikan index

In [64]:
laporan.hide_index()

tanggal,omset,operasional
01/01/00,Rp 800000.00,Rp 60000.00
02/01/00,Rp 900000.00,Rp 130000.00
03/01/00,Rp 1700000.00,Rp 190000.00
04/01/00,Rp 1600000.00,Rp 130000.00
05/01/00,Rp 1700000.00,Rp 70000.00


- Menambahkan judul 

In [65]:
laporan.set_caption('Data Omset dan Operasional')

tanggal,omset,operasional
01/01/00,Rp 800000.00,Rp 60000.00
02/01/00,Rp 900000.00,Rp 130000.00
03/01/00,Rp 1700000.00,Rp 190000.00
04/01/00,Rp 1600000.00,Rp 130000.00
05/01/00,Rp 1700000.00,Rp 70000.00


- Memberikan highlight Max dan Min

In [71]:
laporan.highlight_min('omset', color='red')
laporan.highlight_max('omset', color='yellow')

laporan.highlight_min('operasional', color='purple')
laporan.highlight_max('operasional', color='lightgreen')

tanggal,omset,operasional
01/01/00,Rp 800000.00,Rp 60000.00
02/01/00,Rp 900000.00,Rp 130000.00
03/01/00,Rp 1700000.00,Rp 190000.00
04/01/00,Rp 1600000.00,Rp 130000.00
05/01/00,Rp 1700000.00,Rp 70000.00
