# Pandas

Pandas ini adalah tools utama untuk manipulasi data tabular di python

## 1. Import Pandas

In [17]:
import pandas as pd

## 2. Stuktur Data: Series dan DataFrame

In [18]:
# Series
s = pd.Series([1, 2, 3, 4, 5])
print(s)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [19]:
# DataFrame
data = {
    "Nama" : ["upin", "ipin", "citra"],
    "Nilai" : [100, 90, 80],
    "Kehadiran" : [True, True, False]   
}

df = pd.DataFrame(data)
print(df)

    Nama  Nilai  Kehadiran
0   upin    100       True
1   ipin     90       True
2  citra     80      False


## 3. Baca dan Tuilis File CSV

In [20]:
df.to_csv("output.csv", index=False)
df = pd.read_csv("output.csv")
print(df)

    Nama  Nilai  Kehadiran
0   upin    100       True
1   ipin     90       True
2  citra     80      False


## 4. Melihat Data

In [21]:
df.head()         # 5 baris pertama
df.tail()         # 5 baris terakhir
df.shape          # (baris, kolom)
df.columns        # daftar nama kolom
df.info()         # ringkasan struktur data
df.describe()     # statistik deskriptif


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Nama       3 non-null      object
 1   Nilai      3 non-null      int64 
 2   Kehadiran  3 non-null      bool  
dtypes: bool(1), int64(1), object(1)
memory usage: 183.0+ bytes


Unnamed: 0,Nilai
count,3.0
mean,90.0
std,10.0
min,80.0
25%,85.0
50%,90.0
75%,95.0
max,100.0


## 5. Akses Data

In [22]:
print(df["Nama"])             # akses kolom
print(df[["Nama", "Nilai"]])  # beberapa kolom

print(df.iloc[0])# baris pertama
print(df.iloc[0:2])# baris 0 sampai 1
print(df.loc[0, "Nama"])# data spesifik



0     upin
1     ipin
2    citra
Name: Nama, dtype: object
    Nama  Nilai
0   upin    100
1   ipin     90
2  citra     80
Nama         upin
Nilai         100
Kehadiran    True
Name: 0, dtype: object
   Nama  Nilai  Kehadiran
0  upin    100       True
1  ipin     90       True
upin


## 6. Filter / Seleksi Data

In [23]:
df[df["Nilai"] > 80]     # filter siswa dengan nilai > 80

Unnamed: 0,Nama,Nilai,Kehadiran
0,upin,100,True
1,ipin,90,True


## 8. Tambahkan / Hapus Kolom

In [24]:
df["Lulus"] = df["Nilai"] > 75  # kolom baru berdasarkan logika

df.drop("Lulus", axis=1, inplace=True)  # hapus kolom

## 9. Ubah Nilai

In [25]:
df["Nilai"] = df["Nilai"] + 5
df.loc[0, "Nama"] = "Andi"     # ubah data baris pertama kolom Nama

# 10. Deteksi Nilai Hilang

In [26]:
df.isnull().sum()      # cek jumlah null tiap kolom
df.dropna()            # hapus baris yang ada null-nya
df.fillna(0)           # isi null dengan 0

Unnamed: 0,Nama,Nilai,Kehadiran
0,Andi,105,True
1,ipin,95,True
2,citra,85,False


# 11. Encoding Data Kategori

In [27]:
df["Gender"] = ["L", "P", "P"]  # match the number of rows in df
df_encoded = pd.get_dummies(df, columns=["Gender"])

# 12. Normalisasi Data

In [28]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df[["Nilai"]] = scaler.fit_transform(df[["Nilai"]])

# 13. Gabung (merge/join)

In [40]:
transportation = {
    "Name" : ["boat", "airplane", "bus"],
    "classifications" : ["Water", "Air", "Land"],
}

animal = {
    "Name" : ["fish", "bird", "elephant"],
    "classifications" : ["Water", "Air", "Land"],
}

df_transportation = pd.DataFrame(transportation)
df_animal = pd.DataFrame(animal)

# merge 2 DataFrame
pd.merge(df_transportation, df_animal, on="classifications", how="outer")


Unnamed: 0,Name_x,classifications,Name_y
0,airplane,Air,bird
1,bus,Land,elephant
2,boat,Water,fish
