# Data manipulation

Data manipulation is the process of changing or altering data in order to make it more readable and organized. For example, you can arrange data alphabetically to expedite the process of finding useful information. Another example of data manipulation is website management.

In [42]:
import pandas as pd
import numpy as np

In [43]:
data=[0.25,0.5,0.75,1]

data to series

In [44]:
data=pd.Series(data)

In [45]:
print(data)

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64


data frame to array

In [46]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

# index

index implisit is an index default in python and index explisit is an customized index in python

In [47]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [48]:
list(range(1,10))

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [49]:
data[2]

0.75

In [50]:
data=pd.Series([0.25,0.5,0.75,1],index=["a","b","c","d"])

In [51]:
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [52]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [53]:
data.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [54]:
data_2=pd.Series([0.25,0.5,0.75,1],index=[2,5,7,9])

In [55]:
data_2

2    0.25
5    0.50
7    0.75
9    1.00
dtype: float64

In [56]:
data_2[2]

0.25

In [57]:
data_2[0]

KeyError: ignored

In [58]:
data["b":"c"]

b    0.50
c    0.75
dtype: float64

In [59]:
data[1:2]

b    0.5
dtype: float64

In [60]:
data_2=pd.Series([0.25,0.5,0.75,1],index=[2,5,3,7])

In [61]:
data_2[2:3]

3    0.75
dtype: float64

loc to call index explisit and iloc to call index implisit

In [63]:
data_2

2    0.25
5    0.50
3    0.75
7    1.00
dtype: float64

In [64]:
data_2.loc[3]

0.75

In [65]:
data_2.loc[2:3]

2    0.25
5    0.50
3    0.75
dtype: float64

In [66]:
data_2

2    0.25
5    0.50
3    0.75
7    1.00
dtype: float64

In [68]:
data_2.iloc[3]

1.0

In [69]:
data_2.iloc[2:3]

3    0.75
dtype: float64

# data frame

In [70]:
dict_populasi={"jakarta":370,
               "bogor":490,
               "depok":350,
               "tanggeringan":270,
               "bekasi":670}

In [71]:
populasi=pd.Series(dict_populasi)

In [72]:
populasi

jakarta         370
bogor           490
depok           350
tanggeringan    270
bekasi          670
dtype: int64

In [73]:
populasi.loc["depok"]

350

In [74]:
populasi.iloc[2]

350

In [75]:
dict_luas={"jakarta":720,
               "bogor":230,
               "depok":560,
               "tanggeringan":420,
               "bekasi":290}

In [76]:
luas=pd.Series(dict_luas)

In [77]:
luas

jakarta         720
bogor           230
depok           560
tanggeringan    420
bekasi          290
dtype: int64

In [78]:
daerah=pd.DataFrame({"pop":populasi,"luas":luas})

In [79]:
daerah

Unnamed: 0,pop,luas
jakarta,370,720
bogor,490,230
depok,350,560
tanggeringan,270,420
bekasi,670,290


In [80]:
daerah["luas"]["jakarta"]

720

In [81]:
daerah.pop

<bound method DataFrame.pop of               pop  luas
jakarta       370   720
bogor         490   230
depok         350   560
tanggeringan  270   420
bekasi        670   290>

In [82]:
daerah=pd.DataFrame({"populasi":populasi,"luas":luas})

In [83]:
daerah

Unnamed: 0,populasi,luas
jakarta,370,720
bogor,490,230
depok,350,560
tanggeringan,270,420
bekasi,670,290


In [84]:
daerah.populasi

jakarta         370
bogor           490
depok           350
tanggeringan    270
bekasi          670
Name: populasi, dtype: int64

In [85]:
daerah.populasi["jakarta":"depok"]

jakarta    370
bogor      490
depok      350
Name: populasi, dtype: int64

In [86]:
daerah.populasi.iloc[1:3]

bogor    490
depok    350
Name: populasi, dtype: int64

adding new coloumn can use code program below

In [87]:
daerah["populasi per area"]=daerah["populasi"]/daerah["luas"]

In [88]:
daerah

Unnamed: 0,populasi,luas,populasi per area
jakarta,370,720,0.513889
bogor,490,230,2.130435
depok,350,560,0.625
tanggeringan,270,420,0.642857
bekasi,670,290,2.310345


adding new row can use step below

In [89]:
daerah_tambahan=pd.DataFrame({"bandung":[342,980,342/980]})

In [90]:
daerah_tambahan

Unnamed: 0,bandung
0,342.0
1,980.0
2,0.34898


In [91]:
daerah_tambahan=daerah_tambahan.T

In [92]:
daerah_tambahan.columns=daerah.columns

In [93]:
pd.concat([daerah,daerah_tambahan])

Unnamed: 0,populasi,luas,populasi per area
jakarta,370.0,720.0,0.513889
bogor,490.0,230.0,2.130435
depok,350.0,560.0,0.625
tanggeringan,270.0,420.0,0.642857
bekasi,670.0,290.0,2.310345
bandung,342.0,980.0,0.34898
