# 17 Resampling pada Data Deret Waktu (time series data)

Import Modules

In [1]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

2.0.3
1.24.3


Persiapan Data Frame

In [2]:
n_rows = 365 * 24
n_cols = 2

cols = ['col1', 'col2']

df = pd.DataFrame(np.random.randint(1, 20, size=(n_rows, n_cols)),
                  columns=cols)
df.index = pd.date_range(start='2023-01-01', periods=n_rows, freq='H')
df

Unnamed: 0,col1,col2
2023-01-01 00:00:00,2,16
2023-01-01 01:00:00,14,3
2023-01-01 02:00:00,6,19
2023-01-01 03:00:00,3,18
2023-01-01 04:00:00,10,16
...,...,...
2023-12-31 19:00:00,17,19
2023-12-31 20:00:00,12,14
2023-12-31 21:00:00,4,8
2023-12-31 22:00:00,2,5


Resampling Data dengan Interval Monthly

In [5]:
df.resample('M')['col1'].sum().to_frame()

Unnamed: 0,col1
2023-01-31,7582
2023-02-28,6634
2023-03-31,7449
2023-04-30,7187
2023-05-31,7520
2023-06-30,7278
2023-07-31,7477
2023-08-31,7500
2023-09-30,7425
2023-10-31,7798


Resampling Data dengan Interval Daily

In [6]:
df.resample('D')['col1'].sum().to_frame()

Unnamed: 0,col1
2023-01-01,217
2023-01-02,244
2023-01-03,220
2023-01-04,247
2023-01-05,219
...,...
2023-12-27,215
2023-12-28,249
2023-12-29,182
2023-12-30,240


# 18 Membentuk dummy Data Frame

Import Modules

In [7]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

2.0.3
1.24.3


Membentuk Data Frame dari Dictionary

In [8]:
pd.DataFrame({'col1':[1, 2, 3, 4], 
              'col2':[5, 6, 7, 8]})

Unnamed: 0,col1,col2
0,1,5
1,2,6
2,3,7
3,4,8


Membentuk Data Frame dari Numpy Array

In [9]:
n_rows = 5
n_cols = 3
arr = np.random.randint(1, 20, size=(n_rows, n_cols))
arr

array([[18,  9,  4],
       [ 4,  5, 19],
       [ 6,  8, 16],
       [ 6,  1, 13],
       [ 9, 11,  3]])

In [10]:
df = pd.DataFrame(arr, columns=tuple('ABC'))

Membentuk Data Frame dengan Memanfaatkan pandas.util.testing

In [12]:
date_rng = pd.date_range(start='2023-01-01', end='2023-01-05', freq='D')
df = pd.DataFrame(date_rng, columns=['Date'])
df['value'] = np.random.randint(1, 100, size=(len(date_rng)))

result = df.reset_index(drop=True)
print(result)

        Date  value
0 2023-01-01     57
1 2023-01-02     85
2 2023-01-03     80
3 2023-01-04     18
4 2023-01-05     65


In [13]:
data = {
    'col1': np.random.randint(1, 100, size=5),
    'col2': np.random.rand(5),
    'col3': ['A', 'B', 'C', 'D', 'E'],
    'col4': pd.date_range(start='2023-01-01', periods=5, freq='D'),
    'col5': [True, False, True, False, True]
}
df = pd.DataFrame(data)
df.head()

Unnamed: 0,col1,col2,col3,col4,col5
0,44,0.852555,A,2023-01-01,True
1,11,0.421981,B,2023-01-02,False
2,19,0.117166,C,2023-01-03,True
3,9,0.911778,D,2023-01-04,False
4,18,0.613774,E,2023-01-05,True


In [14]:
date_index = pd.date_range(start='2000-01-03', periods=5, freq='D')
df = pd.DataFrame(np.random.randn(5, 4), index=date_index, columns=['A', 'B', 'C', 'D'])
df.head()

Unnamed: 0,A,B,C,D
2000-01-03,1.406569,0.203469,-0.087943,-0.356886
2000-01-04,-0.07597,1.135609,-1.134648,-1.019116
2000-01-05,0.723151,0.403678,0.052033,1.915797
2000-01-06,1.283097,0.179348,0.552485,-0.765167
2000-01-07,1.280826,0.495392,-0.717062,-2.680614


In [15]:
index = ['WmDnD0VgHq', 'oK5aIeQDwO', 'kRIH8XgQXB', 'xOQltmT4oD', '6QBa6ZnCaP']
columns = ['A', 'B', 'C', 'D']

df = pd.DataFrame(np.random.randn(5, 4), index=index, columns=columns)
random_indices = np.random.choice(df.size, replace=False, size=int(df.size * 0.2))
df.values.flat[random_indices] = np.nan

df.head()

Unnamed: 0,A,B,C,D
WmDnD0VgHq,-0.272316,,0.632297,
oK5aIeQDwO,0.923968,-1.583985,1.036779,0.612198
kRIH8XgQXB,,0.532682,0.10596,0.914729
xOQltmT4oD,-0.988282,1.086572,0.698742,0.602488
6QBa6ZnCaP,-1.257943,,-0.313732,-0.191107


# 19 Formatting tampilan Data Frame

Import Modules

In [16]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

2.0.3
1.24.3


Persiapan Data Frame

In [17]:
n_rows = 5
n_cols = 2
cols = ['omset', 'operasional']

df = pd.DataFrame(np.random.randint(1, 20, size=(n_rows, n_cols)), 
                  columns=cols)
df

Unnamed: 0,omset,operasional
0,8,7
1,15,4
2,8,15
3,18,4
4,4,9


In [18]:
df['omset'] = df['omset'] * 100_000
df['operasional'] = df['operasional'] * 10_000
df

Unnamed: 0,omset,operasional
0,800000,70000
1,1500000,40000
2,800000,150000
3,1800000,40000
4,400000,90000


In [20]:
start_date = pd.to_datetime('2000-01-01')
df['tanggal'] = pd.date_range(start=start_date, periods=n_rows)

df = df[['tanggal', 'omset', 'operasional']]
df

Unnamed: 0,tanggal,omset,operasional
0,2000-01-01,800000,70000
1,2000-01-02,1500000,40000
2,2000-01-03,800000,150000
3,2000-01-04,1800000,40000
4,2000-01-05,400000,90000


Melakukan formatting tampilan Data Frame

In [21]:
formatku = {'tanggal': '{:%d/%m/%y}', 'operasional': 'Rp {:.2f}', 'omset': 'Rp {:.2f}'}
laporan = df.style.format(formatku)
laporan

Unnamed: 0,tanggal,omset,operasional
0,01/01/00,Rp 800000.00,Rp 70000.00
1,02/01/00,Rp 1500000.00,Rp 40000.00
2,03/01/00,Rp 800000.00,Rp 150000.00
3,04/01/00,Rp 1800000.00,Rp 40000.00
4,05/01/00,Rp 400000.00,Rp 90000.00


In [22]:
type(laporan)

pandas.io.formats.style.Styler

In [23]:
df_temp = df.copy()
df_temp.index = [''] * len(df)

df_temp_styled = df_temp.style.format(formatku)
df_temp_styled

Unnamed: 0,tanggal,omset,operasional
,01/01/00,Rp 800000.00,Rp 70000.00
,02/01/00,Rp 1500000.00,Rp 40000.00
,03/01/00,Rp 800000.00,Rp 150000.00
,04/01/00,Rp 1800000.00,Rp 40000.00
,05/01/00,Rp 400000.00,Rp 90000.00


In [24]:
laporan.set_caption('Data Omset dan Operasional')

Unnamed: 0,tanggal,omset,operasional
0,01/01/00,Rp 800000.00,Rp 70000.00
1,02/01/00,Rp 1500000.00,Rp 40000.00
2,03/01/00,Rp 800000.00,Rp 150000.00
3,04/01/00,Rp 1800000.00,Rp 40000.00
4,05/01/00,Rp 400000.00,Rp 90000.00


In [25]:
laporan.highlight_min('omset', color='pink')
laporan.highlight_max('omset', color='lightgreen')

laporan.highlight_min('operasional', color='lightblue')
laporan.highlight_max('operasional', color='grey')

Unnamed: 0,tanggal,omset,operasional
0,01/01/00,Rp 800000.00,Rp 70000.00
1,02/01/00,Rp 1500000.00,Rp 40000.00
2,03/01/00,Rp 800000.00,Rp 150000.00
3,04/01/00,Rp 1800000.00,Rp 40000.00
4,05/01/00,Rp 400000.00,Rp 90000.00


# 20 Menggabungkan (merge) Dua Data Frame Secara Berdampingan

Import Modules

In [26]:
import pandas as pd

print(pd.__version__)

2.0.3


In [27]:
d1 = {'col1':[1, 2, 3], 
      'col2':[10, 20, 30]}
df1 = pd.DataFrame(d1)
df1

Unnamed: 0,col1,col2
0,1,10
1,2,20
2,3,30


In [28]:
d2 = {'col3':[4, 5, 6], 
      'col4':[40, 50, 60]}
df2 = pd.DataFrame(d2)
df2

Unnamed: 0,col3,col4
0,4,40
1,5,50
2,6,60


In [29]:
df = pd.merge(df1, df2, left_index=True, right_index=True)
df

Unnamed: 0,col1,col2,col3,col4
0,1,10,4,40
1,2,20,5,50
2,3,30,6,60
