## Membuat DataFrame

In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame(data={"Nama": ["Ahmad", "Joko", "Adi"],
                        "Umur": [12, 13, 15],
                        "Kelas": [6, 7, 8]})

In [3]:
df

Unnamed: 0,Nama,Umur,Kelas
0,Ahmad,12,6
1,Joko,13,7
2,Adi,15,8


In [4]:
df.columns

Index(['Nama', 'Umur', 'Kelas'], dtype='object')

In [5]:
# Mengganti nama column
df.columns.values[0] = "Nama Singkat"

In [6]:
df

Unnamed: 0,Nama Singkat,Umur,Kelas
0,Ahmad,12,6
1,Joko,13,7
2,Adi,15,8


In [11]:
data = {
    "calories": [12, 32, 43],
    "duration": [19, 24, 64],
    "pulse": [120, 173, 221]
}

In [12]:
df = pd.DataFrame(data)

In [13]:
df

Unnamed: 0,calories,duration,pulse
0,12,19,120
1,32,24,173
2,43,64,221


## Akses Elemen pada DataFrame

In [23]:
# ILOC: Positional indexing
# urutan pertama: row/baris, setelah itu baru kolom
df.iloc[0:3, [0, 2]]

Unnamed: 0,calories,pulse
0,12,120
1,32,173
2,43,221


In [32]:
df.iloc[0:3, 0:3]

Unnamed: 0,calories,duration,pulse
0,12,19,120
1,32,24,173
2,43,64,221


In [28]:
# LOC: Label indexing

In [30]:
df.loc[0:2, :'calories']

Unnamed: 0,calories
0,12
1,32
2,43


In [31]:
df.loc[:, ["calories", "pulse"]]

Unnamed: 0,calories,pulse
0,12,120
1,32,173
2,43,221


In [34]:
# Mengakses kolomnya saja, tipe datanya berubah jadi series
df["calories"]

0    12
1    32
2    43
Name: calories, dtype: int64

In [35]:
# Tipe datanya tetap DataFrame
df[["calories"]]

Unnamed: 0,calories
0,12
1,32
2,43


In [37]:
type(df[["calories"]])

pandas.core.frame.DataFrame

In [39]:
df["calories"][0]

12

In [53]:
df[["calories"]].loc[[1, 2]]

Unnamed: 0,calories
1,32
2,43


## Memberi Nama Pada Index

In [55]:
data = {
    "calories": [420, 380, 390],
    "duration": [50, 40, 45]
}

In [57]:
df = pd.DataFrame(data, index=["data1", "data2", "data3"])

In [58]:
df

Unnamed: 0,calories,duration
data1,420,50
data2,380,40
data3,390,45


In [60]:
df.iloc[0:3, 0:1]

Unnamed: 0,calories
data1,420
data2,380
data3,390


## Membaca File CSV dengan DataFrame

In [62]:
df = pd.read_csv("./data/data.csv")

In [64]:
type(df)

pandas.core.frame.DataFrame

In [65]:
df.head()

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
0,60,110,130,409.1
1,60,117,145,479.0
2,60,103,135,340.0
3,45,109,175,282.4
4,45,117,148,406.0


In [66]:
df.tail()

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
164,60,105,140,290.8
165,60,110,145,300.0
166,60,115,145,310.2
167,75,120,150,320.4
168,75,125,150,330.4


In [67]:
df.head(10)

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
0,60,110,130,409.1
1,60,117,145,479.0
2,60,103,135,340.0
3,45,109,175,282.4
4,45,117,148,406.0
5,60,102,127,300.0
6,60,110,136,374.0
7,45,104,134,253.3
8,30,109,133,195.1
9,60,98,124,269.0


In [68]:
df.tail(10)

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
159,30,80,120,240.9
160,30,85,120,250.4
161,45,90,130,260.4
162,45,95,130,270.0
163,45,100,140,280.9
164,60,105,140,290.8
165,60,110,145,300.0
166,60,115,145,310.2
167,75,120,150,320.4
168,75,125,150,330.4


In [70]:
df.shape

(169, 4)

In [71]:
df.dtypes

Duration      int64
Pulse         int64
Maxpulse      int64
Calories    float64
dtype: object

## Membaca Informasi Pada DataFrame

In [72]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 169 entries, 0 to 168
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Duration  169 non-null    int64  
 1   Pulse     169 non-null    int64  
 2   Maxpulse  169 non-null    int64  
 3   Calories  164 non-null    float64
dtypes: float64(1), int64(3)
memory usage: 5.4 KB


In [73]:
df.describe()

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
count,169.0,169.0,169.0,164.0
mean,63.846154,107.461538,134.047337,375.790244
std,42.299949,14.510259,16.450434,266.379919
min,15.0,80.0,100.0,50.3
25%,45.0,100.0,124.0,250.925
50%,60.0,105.0,131.0,318.6
75%,60.0,111.0,141.0,387.6
max,300.0,159.0,184.0,1860.4


In [74]:
df["Calories"].mean()

375.79024390243904

In [75]:
df["Calories"].mode()[0]

300.0

In [77]:
df["Calories"].median()

318.6

In [78]:
# Korelasi suatu kolom dengan kolom lain
df.corr()

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
Duration,1.0,-0.155408,0.009403,0.922717
Pulse,-0.155408,1.0,0.786535,0.025121
Maxpulse,0.009403,0.786535,1.0,0.203813
Calories,0.922717,0.025121,0.203813,1.0
