<a href="https://colab.research.google.com/github/Kemi41/orbit/blob/main/pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import library

In [1]:
import pandas as pd
import numpy as np

## Manipulating Data with Series

In [2]:
s = pd.Series([3, -5, 7, 4], index=['a', 'b', 'c', 'd']) # membuat 1 kolom dengan berindex a, b, c, d
s

a    3
b   -5
c    7
d    4
dtype: int64

In [3]:
series = pd.Series([1, 0.5, 4, 5.5, 'lima']) # membuat 1 kolom yang berisi beberapa type data
series

0       1
1     0.5
2       4
3     5.5
4    lima
dtype: object

In [4]:
ss = pd.Series([1.4, 5.6, 3, 3, 6], [1, 2, -3, 4, 5])
ss

 1    1.4
 2    5.6
-3    3.0
 4    3.0
 5    6.0
dtype: float64

### Mengambil atau memilih kolom tertentu pada series

In [5]:
s

a    3
b   -5
c    7
d    4
dtype: int64

In [6]:
s[~(s < 1)] # mengambil nilai, dimana negasi dari s > 1 atau s bukan > 1

a    3
c    7
d    4
dtype: int64

In [7]:
s[(s < -1) | (s > 3)] # mengambil nilai, dimana s < -1 atau s > 3

b   -5
c    7
d    4
dtype: int64

In [8]:
s['a'] = 6 # merubah nilai pada label a
s

a    6
b   -5
c    7
d    4
dtype: int64

In [9]:
s.drop(['a', 'c']) # menghapus label a dan c

b   -5
d    4
dtype: int64

In [10]:
s['b'] # memilih var s index b

-5

## Manipulating Data with Data Frame

### Membuat dataframe dari suatu series

In [11]:
df1 = pd.DataFrame(ss)
df1

Unnamed: 0,0
1,1.4
2,5.6
-3,3.0
4,3.0
5,6.0


### Membuat dataframe dengan key dan value

In [12]:
data = {'Country': ['Belgium', 'India', 'Brazil'], 'Capital': ['Brussels', 'New Delhi', 'Brasília'], 'Population': [11190846, 1303171035, 207847528]}
df = pd.DataFrame(data, columns=['Country', 'Capital', 'Population'])
df

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
1,India,New Delhi,1303171035
2,Brazil,Brasília,207847528


### Selection

In [13]:
df[1:] # memilih kolom 1 sampai terakhir

Unnamed: 0,Country,Capital,Population
1,India,New Delhi,1303171035
2,Brazil,Brasília,207847528


In [14]:
df.iloc[[0],[0]] # memilih kolom index 0 dan kolom 0

Unnamed: 0,Country
0,Belgium


In [15]:
df.iloc[1:,[0]] # memilih kolom 1 sampai terakhir, dan baris index ke-0

Unnamed: 0,Country
1,India
2,Brazil


In [16]:
df.iat[0,0] # memilih baris index 0 dan kolom 0

'Belgium'

In [17]:
df.iat[1,0] # memilih baris index 1 dan kolom 0

'India'

In [18]:
df.loc[[0], ['Country']] # memilih baris index ke-0 pada kolom Country

Unnamed: 0,Country
0,Belgium


In [19]:
df.loc[:,['Capital']] # memilih baris semua index pada kolom Capital

Unnamed: 0,Capital
0,Brussels
1,New Delhi
2,Brasília


In [20]:
df.at[0,'Country'] # sama halnya dengan .loc tetapi bedanya 0 merupakan nilai skalar, jadi pengambilan nilai hanya satu

'Belgium'

In [21]:
df.at[1,'Capital']

'New Delhi'

In [22]:
df[df['Population'] > 1200000000] # memilih baris (atau kolom) dengan populasi >  1,2 M

Unnamed: 0,Country,Capital,Population
1,India,New Delhi,1303171035


In [23]:
# df.ix[1,'Capital']
df.loc[1,'Capital']

'New Delhi'

In [24]:
print(pd.__version__)

1.1.5


Atribut dan Method Data Frame lainnya

In [25]:
df

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
1,India,New Delhi,1303171035
2,Brazil,Brasília,207847528


In [26]:
df.drop('Country', axis=1) # menghapus kolom Country

Unnamed: 0,Capital,Population
0,Brussels,11190846
1,New Delhi,1303171035
2,Brasília,207847528


In [27]:
df.sort_index() # mengurutkan berdasarkan index

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
1,India,New Delhi,1303171035
2,Brazil,Brasília,207847528


In [28]:
df.rank() # menentukan peringkat

Unnamed: 0,Country,Capital,Population
0,1.0,2.0,1.0
1,3.0,3.0,3.0
2,2.0,1.0,2.0


In [29]:
df.sort_values(by='Capital') # mengurutkan kolom Country berdasarkan nilai

Unnamed: 0,Country,Capital,Population
2,Brazil,Brasília,207847528
0,Belgium,Brussels,11190846
1,India,New Delhi,1303171035


In [30]:
df.shape # untuk mengetahui jumlah baris dan kolom

(3, 3)

In [31]:
df.index # untuk mengetahui index

RangeIndex(start=0, stop=3, step=1)

In [32]:
df.columns # untuk mengetahui nilai kolom

Index(['Country', 'Capital', 'Population'], dtype='object')

In [33]:
df.info() # untuk mengetahui info 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Country     3 non-null      object
 1   Capital     3 non-null      object
 2   Population  3 non-null      int64 
dtypes: int64(1), object(2)
memory usage: 200.0+ bytes


In [34]:
df.count() # menghitung jumlah elemen bukan NA / None

Country       3
Capital       3
Population    3
dtype: int64

In [35]:
df.sum() # menentukan jumlah

Country              BelgiumIndiaBrazil
Capital       BrusselsNew DelhiBrasília
Population                   1522209409
dtype: object

In [36]:
df.cumsum() # menentukan jumlah komulatif berdasarkan grup

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
1,BelgiumIndia,BrusselsNew Delhi,1314361881
2,BelgiumIndiaBrazil,BrusselsNew DelhiBrasília,1522209409


In [37]:
df.min() # menentukan nilai minimum

Country        Belgium
Capital       Brasília
Population    11190846
dtype: object

In [38]:
df.max() # menentukan nilai maximum

Country            India
Capital        New Delhi
Population    1303171035
dtype: object

In [39]:
df.describe() # menampilkan statistik data

Unnamed: 0,Population
count,3.0
mean,507403100.0
std,696134600.0
min,11190850.0
25%,109519200.0
50%,207847500.0
75%,755509300.0
max,1303171000.0


In [40]:
df.mean() # menentukan rata-rata

Population    5.074031e+08
dtype: float64

In [41]:
df.median() # menentukan nilai tengah

Population    207847528.0
dtype: float64

### Mengimport Data ke Data Frame

In [42]:
#Contoh mengimport dari web
url = 'https://people.sc.fsu.edu/~jburkardt/data/csv/freshman_kgs.csv'
df2 = pd.read_csv(url)

df2

Unnamed: 0,Sex,"""Weight (Sep)""","""Weight (Apr)""","""BMI (Sep)""","""BMI (Apr)"""
0,M,72,59,22.02,18.14
1,M,97,86,19.70,17.44
2,M,74,69,24.09,22.43
3,M,93,88,26.97,25.57
4,F,68,64,21.51,20.10
...,...,...,...,...,...
62,M,65,71,22.51,24.45
63,M,75,82,23.69,25.80
64,F,42,49,15.08,17.74
65,M,74,82,22.64,25.33


In [43]:
disney = '/content/Walt_disney_movie_dataset.csv'
data = pd.read_csv(disney)

data

FileNotFoundError: ignored

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
data.columns

In [None]:
data.describe

In [None]:
data.dtypes

In [None]:
data['Budget (float)'].mean()