# **Week 05**

# 17 : Resampling pada Data Deret Waktu (time series data)

## Import Modules

In [1]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

2.1.3
1.26.2


## Persiapan Data Frame

In [2]:
n_rows = 365 * 24
n_cols = 2
cols = ['col1', 'col2']

# Create a DataFrame with a datetime index
date_index = pd.date_range(start='2023-01-01', periods=n_rows, freq='H')
df = pd.DataFrame(np.random.randint(1, 20, size=(n_rows, n_cols)), columns=cols, index=date_index)

df.head()

Unnamed: 0,col1,col2
2023-01-01 00:00:00,18,9
2023-01-01 01:00:00,3,2
2023-01-01 02:00:00,10,1
2023-01-01 03:00:00,8,14
2023-01-01 04:00:00,18,17


## Resampling Data dengan Interval Monthly

In [3]:
df.resample('M')['col1'].sum().to_frame()

Unnamed: 0,col1
2023-01-31,7406
2023-02-28,6656
2023-03-31,7306
2023-04-30,7221
2023-05-31,7087
2023-06-30,7242
2023-07-31,7191
2023-08-31,7219
2023-09-30,7368
2023-10-31,7310


## Resampling Data dengan Interval Daily

In [4]:
df.resample('D')['col1'].sum().to_frame()

Unnamed: 0,col1
2023-01-01,229
2023-01-02,232
2023-01-03,201
2023-01-04,193
2023-01-05,229
...,...
2023-12-27,227
2023-12-28,255
2023-12-29,272
2023-12-30,258


# 18 : Membentuk dummy Data Frame

## Import Modules

In [5]:
import pandas as pd
import numpy as np

#Cek versi
print(pd.__version__)
print(np.__version__)

2.1.3
1.26.2


## Membentuk Data Frame dari Dictionary

In [6]:
pd.DataFrame({'col1':[1, 2, 3, 4], 
              'col2':[5, 6, 7, 8]})

Unnamed: 0,col1,col2
0,1,5
1,2,6
2,3,7
3,4,8


## Membentuk Data Frame dari Numpy Array

In [7]:
n_rows = 5
n_cols = 3

arr = np.random.randint(1, 20, size=(n_rows, n_cols))
arr

array([[ 3,  8,  5],
       [18, 11, 17],
       [10, 16, 14],
       [ 5,  4, 15],
       [ 1,  1, 18]])

In [8]:
pd.DataFrame(arr, columns=tuple('ABC'))

Unnamed: 0,A,B,C
0,3,8,5
1,18,11,17
2,10,16,14
3,5,4,15
4,1,1,18


## Membentuk Data Frame dengan Memanfaatkan pandas.util.testing

In [10]:
# Create a DataFrame with sample data
data = {
    'A': np.random.rand(5),
    'B': np.random.randint(1, 10, size=(5)),
    'C': np.random.choice(['X', 'Y', 'Z'], size=(5))
}

df = pd.DataFrame(data)

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,A,B,C
0,0.175989,8,X
1,0.144405,2,X
2,0.352845,2,Y
3,0.647855,1,X
4,0.830177,6,Y


In [12]:
# Create a mixed data type DataFrame with sample data
data = {
    'A': [1, 2.5, 'foo', True, pd.to_datetime('2023-01-01')],
    'B': [1.1, 'bar', False, 3, np.nan],
    'C': [pd.Timedelta(days=1), 42, 'baz', 7.7, 'qux']
}

df = pd.DataFrame(data)

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,A,B,C
0,1,1.1,1 days 00:00:00
1,2.5,bar,42
2,foo,False,baz
3,True,3,7.7
4,2023-01-01 00:00:00,,qux


In [13]:
# Create a time-based DataFrame with sample data
n_rows = 5
date_index = pd.date_range(start='2023-01-01', periods=n_rows, freq='D')

data = {
    'A': np.random.rand(n_rows),
    'B': np.random.randn(n_rows),
    'C': np.random.randint(1, 10, size=n_rows)
}

df = pd.DataFrame(data, index=date_index)

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,A,B,C
2023-01-01,0.72496,-1.387137,2
2023-01-02,0.522664,-0.459761,5
2023-01-03,0.193145,-1.02274,8
2023-01-04,0.64137,-0.794624,3
2023-01-05,0.867376,0.551352,8


In [15]:
# Create a DataFrame with random data
data = {'A': np.random.rand(5), 'B': np.random.rand(5), 'C': np.random.rand(5)}
df = pd.DataFrame(data)

# Introduce some missing values
df.iloc[2:4, 0] = np.nan
df.iloc[1, 1] = np.nan
df.iloc[3, 2] = np.nan

# Display the DataFrame with missing values
df.head()

Unnamed: 0,A,B,C
0,0.982235,0.688978,0.451702
1,0.885425,,0.553374
2,,0.152755,0.017224
3,,0.80285,
4,0.888001,0.38104,0.452171


# 19 : Formatting tampilan Data Frame

## Import Modules

In [16]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

2.1.3
1.26.2


## Persiapan Data Frame

In [17]:
n_rows = 5
n_cols = 2
cols = ['omset', 'operasional']

df = pd.DataFrame(np.random.randint(1, 20, size=(n_rows, n_cols)), 
                  columns=cols)
df

Unnamed: 0,omset,operasional
0,13,8
1,2,15
2,5,9
3,1,2
4,10,2


In [18]:
df['omset'] = df['omset'] * 100_000
df['operasional'] = df['operasional'] * 10_000
df

Unnamed: 0,omset,operasional
0,1300000,80000
1,200000,150000
2,500000,90000
3,100000,20000
4,1000000,20000


In [27]:
n_rows = 365  # or any other value

# Create a DataFrame with random data
data = {'A': np.random.rand(n_rows), 'B': np.random.rand(n_rows), 'C': np.random.rand(n_rows)}
df = pd.DataFrame(data)

# Create a date index using pd.date_range
date_index = pd.date_range(start='2023-01-01', periods=n_rows, freq='D')

# Set the date index to the DataFrame
df.index = date_index

# Reset the index and rename the original index column
df = df.reset_index().rename(columns={'index': 'tanggal'})

df


Unnamed: 0,tanggal,A,B,C
0,2023-01-01,0.541687,0.013667,0.971747
1,2023-01-02,0.737038,0.948785,0.985587
2,2023-01-03,0.683244,0.661411,0.551344
3,2023-01-04,0.663888,0.137486,0.144069
4,2023-01-05,0.767440,0.256624,0.098168
...,...,...,...,...
360,2023-12-27,0.998419,0.154749,0.869153
361,2023-12-28,0.681456,0.751978,0.782006
362,2023-12-29,0.898697,0.511536,0.273186
363,2023-12-30,0.130515,0.953377,0.295301


## Melakukan formatting tampilan Data Frame

In [22]:
formatku = {'tanggal':'{:%d/%m/%y}', 
            'operasional':'Rp {:.2f}',
            'omset':'Rp {:.2f}'}

laporan = df.style.format(formatku)
laporan # laporan bukan objek dataframe

Unnamed: 0,tanggal,A,B,C
0,01/01/23,0.159663,0.309672,0.959025
1,02/01/23,0.459512,0.577328,0.696607
2,03/01/23,0.257346,0.196452,0.23944
3,04/01/23,0.232222,0.725071,0.829243
4,05/01/23,0.067874,0.730593,0.604639
5,06/01/23,0.099758,0.523302,0.304965
6,07/01/23,0.950697,0.790473,0.999866
7,08/01/23,0.624114,0.274115,0.7814
8,09/01/23,0.610404,0.588373,0.899555
9,10/01/23,0.048132,0.271666,0.611627


In [23]:
type(laporan)

pandas.io.formats.style.Styler

In [24]:
laporan.hide_index()

AttributeError: 'Styler' object has no attribute 'hide_index'

- Memberi keterangan

In [21]:
laporan.set_caption('Data Omset dan Operasional')

tanggal,omset,operasional
01/01/00,Rp 1200000.00,Rp 130000.00
02/01/00,Rp 500000.00,Rp 80000.00
03/01/00,Rp 1100000.00,Rp 50000.00
04/01/00,Rp 1200000.00,Rp 150000.00
05/01/00,Rp 1500000.00,Rp 80000.00


- Memberi penanda warna pada dataframe

In [22]:
laporan.highlight_min('omset', color='pink')
laporan.highlight_max('omset', color='lightgreen')

laporan.highlight_min('operasional', color='lightblue')
laporan.highlight_max('operasional', color='grey')

tanggal,omset,operasional
01/01/00,Rp 1200000.00,Rp 130000.00
02/01/00,Rp 500000.00,Rp 80000.00
03/01/00,Rp 1100000.00,Rp 50000.00
04/01/00,Rp 1200000.00,Rp 150000.00
05/01/00,Rp 1500000.00,Rp 80000.00


# **#20 Menggabungkan (merge) Dua Data Frame Secara Berdampingan**
Source : [Indonesia Belajar](https://youtu.be/AldqObRhkDU)

# Import Modules

In [23]:
import pandas as pd

print(pd.__version__)

1.3.4


- Membuat 2 buah dataframe
- Masing-masing dataframe akan memiliki dua buah dictionary

In [24]:
d1 = {'col1':[1, 2, 3], 
      'col2':[10, 20, 30]}
df1 = pd.DataFrame(d1)
df1

Unnamed: 0,col1,col2
0,1,10
1,2,20
2,3,30


In [25]:
d2 = {'col3':[4, 5, 6], 
      'col4':[40, 50, 60]}
df2 = pd.DataFrame(d2)
df2

Unnamed: 0,col3,col4
0,4,40
1,5,50
2,6,60


- Berbeda dengan trick sebelumnya
- Kali ini, kita hanya perlu menambahkan arguments True pada parameter left_index dan right_index.

In [26]:
df = pd.merge(df1, df2, left_index=True, right_index=True)
df

Unnamed: 0,col1,col2,col3,col4
0,1,10,4,40
1,2,20,5,50
2,3,30,6,60
