# Import Modules

---



In [1]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

1.3.5
1.21.6




---

# Pandas 45 | Memadukan loc dan iloc untuk seleksi data pada Data Frame

---





## Persiapan Data Frame

In [3]:
df = pd.read_csv('/content/titanic_full.csv')
df.head()

Unnamed: 0,survived,name,sex,age,sibsp,parch,pclass,ticket,fare,cabin,embarked,boat,body,home.dest
0,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,1,24160,211.3375,B5,S,2.0,,"St Louis, MO"
1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,1,113781,151.55,C22 C26,S,11.0,,"Montreal, PQ / Chesterville, ON"
2,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,1,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
3,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,1,113781,151.55,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON"
4,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,1,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"



## Memadukan **loc** dan **iloc** untuk melakukan seleksi data

iloc[x, y] || 
loc[x, y] --> x = Baris | y = Kolom

In [4]:
df.iloc[15:20, :].loc[:, 'name':'age']

Unnamed: 0,name,sex,age
15,"Baumann, Mr. John D",male,
16,"Baxter, Mr. Quigg Edmond",male,24.0
17,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50.0
18,"Bazzani, Miss. Albina",female,32.0
19,"Beattie, Mr. Thomson",male,36.0


In [5]:
df.loc[:, 'name':'age'].iloc[15:20, :]

Unnamed: 0,name,sex,age
15,"Baumann, Mr. John D",male,
16,"Baxter, Mr. Quigg Edmond",male,24.0
17,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50.0
18,"Bazzani, Miss. Albina",female,32.0
19,"Beattie, Mr. Thomson",male,36.0




---

# Pandas 46 | Seleksi weekdays dan weekends pada data deret waktu

---





## Persiapan Data Frame

In [6]:
n_rows = 365
n_cols = 2
cols = ['col_1', 'col_2']

df = pd.DataFrame(np.random.randint(1, 20, size=(n_rows, n_cols)), columns=cols)
df.index = pd.util.testing.makeDateIndex(n_rows, freq='D')
df

Unnamed: 0,col_1,col_2
2000-01-01,5,19
2000-01-02,5,16
2000-01-03,3,18
2000-01-04,19,11
2000-01-05,18,2
...,...,...
2000-12-26,18,9
2000-12-27,8,5
2000-12-28,3,14
2000-12-29,17,18



## Selesi weekdays dan weekends

In [9]:
weekdays_df = df[df.index.dayofweek.isin([0, 1, 2, 3, 4])]
weekdays_df.head(10)

Unnamed: 0,col_1,col_2
2000-01-03,3,18
2000-01-04,19,11
2000-01-05,18,2
2000-01-06,5,16
2000-01-07,18,12
2000-01-10,5,4
2000-01-11,16,6
2000-01-12,13,5
2000-01-13,18,18
2000-01-14,9,16


In [8]:
weekdays_df = df[df.index.dayofweek.isin([5, 6])]
weekdays_df.head(10)

Unnamed: 0,col_1,col_2
2000-01-01,5,19
2000-01-02,5,16
2000-01-08,7,14
2000-01-09,3,13
2000-01-15,13,17
2000-01-16,11,8
2000-01-22,9,11
2000-01-23,3,4
2000-01-29,15,15
2000-01-30,9,18




---

# Pandas 47 | Penanganan kolom dengan tipe data beragam
---





## Persiapan Data Frame

In [10]:
d = {'nama' : ['bejo', 'tejo', 'wati', 'tiwi', 'cecep'],
     'ipk' : [2, '3', 3, 2.75, '3.25']
     }
df = pd.DataFrame(d)
df

Unnamed: 0,nama,ipk
0,bejo,2.0
1,tejo,3.0
2,wati,3.0
3,tiwi,2.75
4,cecep,3.25



## Deteksi dan penanganan kolom dengan tipe data beragam

In [11]:
df.dtypes

nama    object
ipk     object
dtype: object

In [12]:
df['ipk'].apply(type)

0      <class 'int'>
1      <class 'str'>
2      <class 'int'>
3    <class 'float'>
4      <class 'str'>
Name: ipk, dtype: object

In [13]:
df['ipk'].apply(type).value_counts()

<class 'int'>      2
<class 'str'>      2
<class 'float'>    1
Name: ipk, dtype: int64

In [15]:
df['ipk'] = df['ipk'].astype(float)

In [16]:
df['ipk'].apply(type).value_counts()

<class 'float'>    5
Name: ipk, dtype: int64



---

# Pandas 48 | Cummulative Count pada Pandas Data Frame

---





## Persiapan Data Frame

In [17]:
d = {'penjual' : ['bejo', 'tejo', 'wati', 'bejo', 'cecep', 'tejo', 'wati', 'bejo'],
     'barang' : ['monitor', 'monitor', 'keyboard', 'mouse', 'keyboard', 'monitor', 'laptop', 'monitor']
     }
df = pd.DataFrame(d)
df

Unnamed: 0,penjual,barang
0,bejo,monitor
1,tejo,monitor
2,wati,keyboard
3,bejo,mouse
4,cecep,keyboard
5,tejo,monitor
6,wati,laptop
7,bejo,monitor



## Mengenal Cummulative Count dengan **cumcount()**

*   cumcount() --> Menjumlahkan nilai yang sama pada suatu kolom

*   cumcount()+1 --> Supaya dimulai dari index 1

   


In [19]:
df['count_per_penjual'] = df.groupby('penjual').cumcount() + 1
df

Unnamed: 0,penjual,barang,count_per_penjual
0,bejo,monitor,1
1,tejo,monitor,1
2,wati,keyboard,1
3,bejo,mouse,2
4,cecep,keyboard,1
5,tejo,monitor,2
6,wati,laptop,2
7,bejo,monitor,3


In [20]:
df['count_per_barang'] = df.groupby('barang').cumcount() + 1
df

Unnamed: 0,penjual,barang,count_per_penjual,count_per_barang
0,bejo,monitor,1,1
1,tejo,monitor,1,2
2,wati,keyboard,1,1
3,bejo,mouse,2,1
4,cecep,keyboard,1,2
5,tejo,monitor,2,3
6,wati,laptop,2,1
7,bejo,monitor,3,4


In [21]:
df['count_barang_penjual'] = df.groupby(['barang', 'penjual']).cumcount() + 1
df

Unnamed: 0,penjual,barang,count_per_penjual,count_per_barang,count_barang_penjual
0,bejo,monitor,1,1,1
1,tejo,monitor,1,2,1
2,wati,keyboard,1,1,1
3,bejo,mouse,2,1,1
4,cecep,keyboard,1,2,1
5,tejo,monitor,2,3,2
6,wati,laptop,2,1,1
7,bejo,monitor,3,4,2
