## Liste İle Pandas Serisi Oluşturma

In [1]:
import pandas as pd

In [2]:
pd.Series([10, 20, 30, 40, 50])

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [3]:
list_data = [2, 4, 6, 8, 10]
list_index = ["a", "b", "c", "d", "e"]

In [4]:
variable = pd.Series(data = list_data, index = list_index, dtype = "float")

In [5]:
variable

a     2.0
b     4.0
c     6.0
d     8.0
e    10.0
dtype: float64

In [6]:
type(variable)

pandas.core.series.Series

In [7]:
list_data = [2, 4, 6, 8, 10, 12]
list_index = ["a", "b", "c", "d", "e"]

In [8]:
variable = pd.Series(data = list_data, index = list_index, dtype = "float")

ValueError: Length of passed values is 6, index implies 5.

## Sözlük İle Pandas Serisi Oluşturma

In [9]:
import pandas as pd 

In [10]:
dictionary = {"ferrari" : 334.7, "porche" : 337.9, "lamborghini" : 349}

In [11]:
pd.Series(data = dictionary)

ferrari        334.7
porche         337.9
lamborghini    349.0
dtype: float64

In [12]:
variable = pd.Series(data = dictionary, index = ["porche", "bugatti", "ferrari"])

In [13]:
variable

porche     337.9
bugatti      NaN
ferrari    334.7
dtype: float64

## Numpy Array’i İle Pandas Serisi Oluşturma

In [14]:
import numpy as np
import pandas as pd

In [15]:
example = np.array([1, 3.3, 5, 7.2, 9])
labels = np.array(["a", "b", "c", "d", "e"])

In [16]:
variable = pd.Series(data = example, index = labels)

In [17]:
variable

a    1.0
b    3.3
c    5.0
d    7.2
e    9.0
dtype: float64

## Seri İçindeki Nesne Türleri

In [18]:
import pandas as pd

In [19]:
pd.Series(["world", 100, 5.8, False])

0    world
1      100
2      5.8
3    False
dtype: object

In [20]:
example = pd.Series(["world", 100, 5.8, False])

In [21]:
example[0]

'world'

In [22]:
print(type(example[0]))

<class 'str'>


In [23]:
print(type(example[1]))
print(type(example[2]))
print(type(example[3]))

<class 'int'>
<class 'float'>
<class 'bool'>


In [24]:
pd.Series([sum, type, max])

0    <built-in function sum>
1             <class 'type'>
2    <built-in function max>
dtype: object

## Pandas Serilerinin Temel Özelliklerini İnceleme  

In [25]:
import pandas as pd

In [26]:
example = pd.Series([10, 20, 30, 40, 50, 60, 70])

In [27]:
example

0    10
1    20
2    30
3    40
4    50
5    60
6    70
dtype: int64

In [28]:
example.axes

[RangeIndex(start=0, stop=7, step=1)]

In [29]:
example.dtype

dtype('int64')

In [30]:
example.size

7

In [31]:
example.ndim

1

In [32]:
example.head()

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [33]:
example.tail()

2    30
3    40
4    50
5    60
6    70
dtype: int64

## Pandas Serileri Üzerine En Sık Uygulananan  Metot’lar

In [34]:
import pandas as pd

In [35]:
variable = pd.Series([8, 3, 6, 5, 1], index = ["b", "d", "c", "a", "e"])

In [36]:
variable

b    8
d    3
c    6
a    5
e    1
dtype: int64

In [37]:
new_variable = variable.sort_index()

In [38]:
new_variable

a    5
b    8
c    6
d    3
e    1
dtype: int64

In [39]:
new_variable2 = variable.sort_values()

In [40]:
new_variable2

e    1
d    3
a    5
c    6
b    8
dtype: int64

In [41]:
variable

b    8
d    3
c    6
a    5
e    1
dtype: int64

In [42]:
variable.isin([6, 1])

b    False
d    False
c     True
a    False
e     True
dtype: bool

In [43]:
variable[variable.isin([6, 1])]

c    6
e    1
dtype: int64

In [44]:
variable.values

array([8, 3, 6, 5, 1], dtype=int64)

In [45]:
[i for i in variable.values]

[8, 3, 6, 5, 1]

In [46]:
variable.index

Index(['b', 'd', 'c', 'a', 'e'], dtype='object')

In [47]:
[i for i in variable.index]

['b', 'd', 'c', 'a', 'e']

In [48]:
variable.items

<bound method Series.items of b    8
d    3
c    6
a    5
e    1
dtype: int64>

In [49]:
variable.items()

<zip at 0x263022c4640>

In [50]:
list(variable.items())

[('b', 8), ('d', 3), ('c', 6), ('a', 5), ('e', 1)]

In [51]:
for index, value in variable.items():
    print(index, "-", value)

b - 8
d - 3
c - 6
a - 5
e - 1


## Pandas Serilerini İndexleme

In [52]:
example = pd.Series(["+90", "+49", "+33", "+39", "+46", "+47"], index = ["Turkey", "Germany", "France", "Italy", "Sweden", "Norway"])

In [53]:
example

Turkey     +90
Germany    +49
France     +33
Italy      +39
Sweden     +46
Norway     +47
dtype: object

In [54]:
example["Germany"]

'+49'

In [55]:
example[1]

'+49'

In [56]:
example["France":"Sweden"]

France    +33
Italy     +39
Sweden    +46
dtype: object

In [57]:
example[2:5]

France    +33
Italy     +39
Sweden    +46
dtype: object

In [58]:
example[2:5:2]

France    +33
Sweden    +46
dtype: object

In [59]:
example[["Norway", "Turkey"]]

Norway    +47
Turkey    +90
dtype: object

## Liste İle Pandas DataFrame’i Oluşturma

In [60]:
import pandas as pd

In [61]:
list_example = [5, 10, 15, 20, 25]

In [62]:
list_example

[5, 10, 15, 20, 25]

In [63]:
pd.DataFrame(data = list_example, columns = ["values"], dtype = "float" )

Unnamed: 0,values
0,5.0
1,10.0
2,15.0
3,20.0
4,25.0


## NumPy Array’i İle Pandas DataFrame’i oluşturma

In [64]:
import numpy as np
import pandas as pd

In [65]:
array_example = np.random.randint(0, 50, (4, 4))

In [66]:
array_example

array([[28, 20, 12,  4],
       [42, 29, 36,  5],
       [ 2, 31, 28, 28],
       [33, 14, 32,  9]])

In [67]:
df = pd.DataFrame(array_example, index = ["a", "b", "c", "d"], columns = ["val1", "val2", "val3", "val4"])

In [68]:
df

Unnamed: 0,val1,val2,val3,val4
a,28,20,12,4
b,42,29,36,5
c,2,31,28,28
d,33,14,32,9


## Sözlük İle Pandas DataFrame’i oluşturma

In [69]:
import numpy as np
import pandas as pd

In [70]:
x = np.random.randint(10, 50, size = 6)

In [71]:
x

array([47, 38, 41, 30, 35, 13])

In [72]:
y = np.random.randint(10, 50, size = 6)
z = np.random.randint(10, 50, size = 6)
t = np.random.randint(10, 50, size = 6)

In [73]:
dict_example = {"val1":x, "val2":y, "val3":z, "val4":t}

In [74]:
dict_example

{'val1': array([47, 38, 41, 30, 35, 13]),
 'val2': array([19, 44, 42, 25, 10, 36]),
 'val3': array([42, 24, 39, 46, 31, 14]),
 'val4': array([16, 10, 22, 18, 46, 44])}

In [75]:
df = pd.DataFrame(dict_example)

In [76]:
df

Unnamed: 0,val1,val2,val3,val4
0,47,19,42,16
1,38,44,24,10
2,41,42,39,22
3,30,25,46,18
4,35,10,31,46
5,13,36,14,44


## Pandas DataFrame'lerinin Özelliklerini İnceleme

In [77]:
import numpy as np
import pandas as pd

In [78]:
x = np.random.randint(10, 50, size = 6)
y = np.random.randint(10, 50, size = 6)
z = np.random.randint(10, 50, size = 6)
t = np.random.randint(10, 50, size = 6)

dict_example = {"val1":x, "val2":y, "val3":z, "val4":t}

df = pd.DataFrame(dict_example)

In [79]:
df

Unnamed: 0,val1,val2,val3,val4
0,47,39,18,25
1,35,44,43,49
2,20,17,39,30
3,40,28,23,24
4,36,18,41,39
5,19,41,21,27


In [80]:
df.head()

Unnamed: 0,val1,val2,val3,val4
0,47,39,18,25
1,35,44,43,49
2,20,17,39,30
3,40,28,23,24
4,36,18,41,39


In [81]:
df.head(3)

Unnamed: 0,val1,val2,val3,val4
0,47,39,18,25
1,35,44,43,49
2,20,17,39,30


In [82]:
df.tail(3)

Unnamed: 0,val1,val2,val3,val4
3,40,28,23,24
4,36,18,41,39
5,19,41,21,27


In [83]:
df.columns

Index(['val1', 'val2', 'val3', 'val4'], dtype='object')

In [84]:
[i for i in df.columns]

['val1', 'val2', 'val3', 'val4']

In [85]:
df

Unnamed: 0,val1,val2,val3,val4
0,47,39,18,25
1,35,44,43,49
2,20,17,39,30
3,40,28,23,24
4,36,18,41,39
5,19,41,21,27


In [86]:
df.columns = ["new1", "new2", "new3", "new4"]

In [87]:
df

Unnamed: 0,new1,new2,new3,new4
0,47,39,18,25
1,35,44,43,49
2,20,17,39,30
3,40,28,23,24
4,36,18,41,39
5,19,41,21,27


In [88]:
df.values

array([[47, 39, 18, 25],
       [35, 44, 43, 49],
       [20, 17, 39, 30],
       [40, 28, 23, 24],
       [36, 18, 41, 39],
       [19, 41, 21, 27]])

In [89]:
type(df.values)

numpy.ndarray

In [90]:
type(df)

pandas.core.frame.DataFrame

In [91]:
df.shape

(6, 4)

In [92]:
df.ndim

2

In [93]:
df.size

24

## Pandas DataFrame’lerinde Eleman Seçim İşlemleri 1.Ders

In [94]:
import numpy as np
import pandas as pd

In [95]:
np.random.seed(101)
df = pd.DataFrame(data = np.random.randn(6,5), index = "A B C D E F".split(), columns = "VAL1 VAL2 VAL3 VAL4 VAL5".split())

In [96]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,2.70685,0.628133,0.907969,0.503826,0.651118
B,-0.319318,-0.848077,0.605965,-2.018168,0.740122
C,0.528813,-0.589001,0.188695,-0.758872,-0.933237
D,0.955057,0.190794,1.978757,2.605967,0.683509
E,0.302665,1.693723,-1.706086,-1.159119,-0.134841
F,0.390528,0.166905,0.184502,0.807706,0.07296


In [97]:
df["VAL1"]

A    2.706850
B   -0.319318
C    0.528813
D    0.955057
E    0.302665
F    0.390528
Name: VAL1, dtype: float64

In [98]:
df.VAL1

A    2.706850
B   -0.319318
C    0.528813
D    0.955057
E    0.302665
F    0.390528
Name: VAL1, dtype: float64

In [99]:
df["VAL1"].values

array([ 2.70684984, -0.31931804,  0.52881349,  0.95505651,  0.30266545,
        0.39052784])

In [100]:
df[["VAL1"]]

Unnamed: 0,VAL1
A,2.70685
B,-0.319318
C,0.528813
D,0.955057
E,0.302665
F,0.390528


In [101]:
df[["VAL1"]]["B":"D"]

Unnamed: 0,VAL1
B,-0.319318
C,0.528813
D,0.955057


In [102]:
df[["VAL1"]][1:4]

Unnamed: 0,VAL1
B,-0.319318
C,0.528813
D,0.955057


## Pandas DataFrame’lerinde Eleman Seçim İşlemleri 2.Ders

In [103]:
import numpy as np
import pandas as pd

In [104]:
np.random.seed(101)
df = pd.DataFrame(data = np.random.randn(6,5), index = "A B C D E F".split(), columns = "VAL1 VAL2 VAL3 VAL4 VAL5".split())

In [105]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,2.70685,0.628133,0.907969,0.503826,0.651118
B,-0.319318,-0.848077,0.605965,-2.018168,0.740122
C,0.528813,-0.589001,0.188695,-0.758872,-0.933237
D,0.955057,0.190794,1.978757,2.605967,0.683509
E,0.302665,1.693723,-1.706086,-1.159119,-0.134841
F,0.390528,0.166905,0.184502,0.807706,0.07296


In [106]:
df[["VAL2", "VAL5"]]

Unnamed: 0,VAL2,VAL5
A,0.628133,0.651118
B,-0.848077,0.740122
C,-0.589001,-0.933237
D,0.190794,0.683509
E,1.693723,-0.134841
F,0.166905,0.07296


In [107]:
variable = ["VAL3", "VAL4"]

In [108]:
df[variable]

Unnamed: 0,VAL3,VAL4
A,0.907969,0.503826
B,0.605965,-2.018168
C,0.188695,-0.758872
D,1.978757,2.605967
E,-1.706086,-1.159119
F,0.184502,0.807706


In [109]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,2.70685,0.628133,0.907969,0.503826,0.651118
B,-0.319318,-0.848077,0.605965,-2.018168,0.740122
C,0.528813,-0.589001,0.188695,-0.758872,-0.933237
D,0.955057,0.190794,1.978757,2.605967,0.683509
E,0.302665,1.693723,-1.706086,-1.159119,-0.134841
F,0.390528,0.166905,0.184502,0.807706,0.07296


In [110]:
df["B":"D"]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
B,-0.319318,-0.848077,0.605965,-2.018168,0.740122
C,0.528813,-0.589001,0.188695,-0.758872,-0.933237
D,0.955057,0.190794,1.978757,2.605967,0.683509


In [111]:
df[1:4]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
B,-0.319318,-0.848077,0.605965,-2.018168,0.740122
C,0.528813,-0.589001,0.188695,-0.758872,-0.933237
D,0.955057,0.190794,1.978757,2.605967,0.683509


In [112]:
df["E"]

KeyError: 'E'

In [113]:
df["E":"E"]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
E,0.302665,1.693723,-1.706086,-1.159119,-0.134841


In [114]:
df["E":"E"][["VAL2","VAL5"]]

Unnamed: 0,VAL2,VAL5
E,1.693723,-0.134841


## Pandas DataFrame’lerinde Üst Düzey Eleman Seçimi: loc ve iloc yapısı 1.Ders

In [115]:
import numpy as np
import pandas as pd

In [116]:
np.random.seed(101)

array = np.random.randint(10, 50, (10, 5))
df = pd.DataFrame(data = array , index = "A B C D E F G H I J".split(), columns = "VAL1 VAL2 VAL3 VAL4 VAL5".split())

In [117]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29
E,20,22,41,33,10
F,19,18,46,29,45
G,38,17,20,49,48
H,19,28,17,49,25
I,10,22,27,21,25
J,43,39,34,46,29


In [118]:
df.loc["A":"D"]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29


In [119]:
df.iloc[0:4]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29


In [120]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29
E,20,22,41,33,10
F,19,18,46,29,45
G,38,17,20,49,48
H,19,28,17,49,25
I,10,22,27,21,25
J,43,39,34,46,29


In [121]:
df.loc["C", "VAL4"]

39

In [122]:
df.iloc[2, 3]

39

## Pandas DataFrame’lerinde Üst Düzey Eleman Seçimi: loc ve iloc Yapısı 2.Ders

In [123]:
import numpy as np
import pandas as pd

In [124]:
np.random.seed(101)

array = np.random.randint(10, 50, (10, 5))
df = pd.DataFrame(data = array , index = "A B C D E F G H I J".split(), columns = "VAL1 VAL2 VAL3 VAL4 VAL5".split())

In [125]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29
E,20,22,41,33,10
F,19,18,46,29,45
G,38,17,20,49,48
H,19,28,17,49,25
I,10,22,27,21,25
J,43,39,34,46,29


In [126]:
df.loc["C":"G","VAL3"]

C    22
D    44
E    41
F    46
G    20
Name: VAL3, dtype: int32

In [127]:
df.loc["C":"G",["VAL3"]]

Unnamed: 0,VAL3
C,22
D,44
E,41
F,46
G,20


In [128]:
df.loc["C":"G"][["VAL3"]]

Unnamed: 0,VAL3
C,22
D,44
E,41
F,46
G,20


In [129]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29
E,20,22,41,33,10
F,19,18,46,29,45
G,38,17,20,49,48
H,19,28,17,49,25
I,10,22,27,21,25
J,43,39,34,46,29


In [130]:
df.iloc[2:7,2]

C    22
D    44
E    41
F    46
G    20
Name: VAL3, dtype: int32

In [131]:
df.iloc[2:7,[2]]

Unnamed: 0,VAL3
C,22
D,44
E,41
F,46
G,20


In [132]:
df.iloc[2:7][["VAL3"]]

Unnamed: 0,VAL3
C,22
D,44
E,41
F,46
G,20


## Pandas DataFrame’lerinde Üst Düzey Eleman Seçimi: loc ve iloc Yapısı 3.Ders

In [133]:
import numpy as np
import pandas as pd

In [134]:
np.random.seed(101)

array = np.random.randint(10, 50, (10, 5))
df = pd.DataFrame(data = array , index = "A B C D E F G H I J".split(), columns = "VAL1 VAL2 VAL3 VAL4 VAL5".split())

In [135]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29
E,20,22,41,33,10
F,19,18,46,29,45
G,38,17,20,49,48
H,19,28,17,49,25
I,10,22,27,21,25
J,43,39,34,46,29


In [136]:
df.loc["C":"H","VAL2":"VAL4"]

Unnamed: 0,VAL2,VAL3,VAL4
C,15,22,39
D,39,44,18
E,22,41,33
F,18,46,29
G,17,20,49
H,28,17,49


In [137]:
df.iloc[2:8,1:4]

Unnamed: 0,VAL2,VAL3,VAL4
C,15,22,39
D,39,44,18
E,22,41,33
F,18,46,29
G,17,20,49
H,28,17,49


In [138]:
df.iloc[2:8,1:4].loc["E":"F", ["VAL3"]]

Unnamed: 0,VAL3
E,41
F,46


## Pandas DataFrame’lerinde Koşullu İşlemler İle Elaman Seçimi

In [139]:
import numpy as np
import pandas as pd

In [140]:
np.random.seed(101)

array = np.random.randint(10, 50, (10, 5))
df = pd.DataFrame(data = array , index = "A B C D E F G H I J".split(), columns = "VAL1 VAL2 VAL3 VAL4 VAL5".split())

In [141]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29
E,20,22,41,33,10
F,19,18,46,29,45
G,38,17,20,49,48
H,19,28,17,49,25
I,10,22,27,21,25
J,43,39,34,46,29


In [142]:
df > 20

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,True,True,True,False,True
B,True,False,True,False,True
C,False,False,True,True,True
D,False,True,True,False,True
E,False,True,True,True,False
F,False,False,True,True,True
G,True,False,False,True,True
H,False,True,False,True,True
I,False,True,True,True,True
J,True,True,True,True,True


In [143]:
df[df > 20]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41.0,21.0,27.0,,33.0
B,21.0,,23.0,,38.0
C,,,22.0,39.0,29.0
D,,39.0,44.0,,29.0
E,,22.0,41.0,33.0,
F,,,46.0,29.0,45.0
G,38.0,,,49.0,48.0
H,,28.0,,49.0,25.0
I,,22.0,27.0,21.0,25.0
J,43.0,39.0,34.0,46.0,29.0


In [144]:
df["VAL1"] < 20

A    False
B    False
C     True
D     True
E    False
F     True
G    False
H     True
I     True
J    False
Name: VAL1, dtype: bool

In [145]:
df[df["VAL1"] < 20]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
C,10,15,22,39,29
D,18,39,44,18,29
F,19,18,46,29,45
H,19,28,17,49,25
I,10,22,27,21,25


In [146]:
df[df["VAL1"] < 20]["VAL2"]

C    15
D    39
F    18
H    28
I    22
Name: VAL2, dtype: int32

In [147]:
df[df["VAL1"] < 20][["VAL2"]]

Unnamed: 0,VAL2
C,15
D,39
F,18
H,28
I,22


In [148]:
df[df["VAL1"] > 20][["VAL2", "VAL5"]]

Unnamed: 0,VAL2,VAL5
A,21,33
B,19,38
G,17,48
J,39,29


In [149]:
df 

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29
E,20,22,41,33,10
F,19,18,46,29,45
G,38,17,20,49,48
H,19,28,17,49,25
I,10,22,27,21,25
J,43,39,34,46,29


In [150]:
df[(df["VAL1"] > 20) & (df["VAL4"] < 18)]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38


In [151]:
df[(df < 35) | (df["VAL5"] > 20)]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,,21.0,27.0,16.0,33.0
B,21.0,19.0,23.0,14.0,
C,10.0,15.0,22.0,,29.0
D,18.0,,,18.0,29.0
E,20.0,22.0,,33.0,10.0
F,19.0,18.0,,29.0,
G,,17.0,20.0,,
H,19.0,28.0,17.0,,25.0
I,10.0,22.0,27.0,21.0,25.0
J,,,34.0,,29.0


In [152]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29
E,20,22,41,33,10
F,19,18,46,29,45
G,38,17,20,49,48
H,19,28,17,49,25
I,10,22,27,21,25
J,43,39,34,46,29


In [153]:
df.loc[df.VAL2 > 25, ["VAL2", "VAL3", "VAL5"]]

Unnamed: 0,VAL2,VAL3,VAL5
D,39,44,29
H,28,17,25
J,39,34,29


## Pandas Data Frame’lerine Sütun Ekleme 

In [154]:
import numpy as np
import pandas as pd

In [155]:
x = np.arange(0, 28).reshape(7, 4)

In [156]:
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27]])

In [157]:
df = pd.DataFrame(data = x, columns = ["VAL1", "VAL2", "VAL3", "VAL4"])

In [158]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15
4,16,17,18,19
5,20,21,22,23
6,24,25,26,27


In [159]:
df["VAL1"] + df["VAL2"]

0     1
1     9
2    17
3    25
4    33
5    41
6    49
dtype: int32

In [160]:
df["VAL5"] = df["VAL1"] + df["VAL2"]

In [161]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
0,0,1,2,3,1
1,4,5,6,7,9
2,8,9,10,11,17
3,12,13,14,15,25
4,16,17,18,19,33
5,20,21,22,23,41
6,24,25,26,27,49


In [162]:
df["VAL6"] = df["VAL3"] * df["VAL4"]

In [163]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5,VAL6
0,0,1,2,3,1,6
1,4,5,6,7,9,42
2,8,9,10,11,17,110
3,12,13,14,15,25,210
4,16,17,18,19,33,342
5,20,21,22,23,41,506
6,24,25,26,27,49,702


In [164]:
np.arange(28, 35)

array([28, 29, 30, 31, 32, 33, 34])

In [165]:
df["VAL7"] = np.arange(28, 35)

In [166]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5,VAL6,VAL7
0,0,1,2,3,1,6,28
1,4,5,6,7,9,42,29
2,8,9,10,11,17,110,30
3,12,13,14,15,25,210,31
4,16,17,18,19,33,342,32
5,20,21,22,23,41,506,33
6,24,25,26,27,49,702,34


In [167]:
df["VAL8"] = [35, 36, 37, 38, 39, 40, 41]

In [168]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5,VAL6,VAL7,VAL8
0,0,1,2,3,1,6,28,35
1,4,5,6,7,9,42,29,36
2,8,9,10,11,17,110,30,37
3,12,13,14,15,25,210,31,38
4,16,17,18,19,33,342,32,39
5,20,21,22,23,41,506,33,40
6,24,25,26,27,49,702,34,41


In [169]:
df["VAL9"] =[35, 36, 37, 38, 39, 40]

ValueError: Length of values (6) does not match length of index (7)

## Pandas Data Frame’lerinden  Satır ve Sütun Çıkarma 

In [170]:
import numpy as np
import pandas as pd

In [171]:
x = np.arange(0, 28).reshape(7, 4)
df = pd.DataFrame(data = x, columns = ["VAL1", "VAL2", "VAL3", "VAL4"])

In [172]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15
4,16,17,18,19
5,20,21,22,23
6,24,25,26,27


In [173]:
df.drop(["VAL3", "VAL4"], axis = 1)

Unnamed: 0,VAL1,VAL2
0,0,1
1,4,5
2,8,9
3,12,13
4,16,17
5,20,21
6,24,25


In [174]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15
4,16,17,18,19
5,20,21,22,23
6,24,25,26,27


In [175]:
df.drop(["VAL3", "VAL4"], axis = 1, inplace = True)

In [176]:
df

Unnamed: 0,VAL1,VAL2
0,0,1
1,4,5
2,8,9
3,12,13
4,16,17
5,20,21
6,24,25


In [177]:
df.drop([2, 5], axis = 0, inplace = True)

In [178]:
df

Unnamed: 0,VAL1,VAL2
0,0,1
1,4,5
3,12,13
4,16,17
6,24,25


## Pandas Dataframelerinde Boş(Null) Değerler

In [179]:
import pandas as pd
import numpy as np

In [180]:
df = pd.DataFrame({'VAL1':[2, 4, np.nan, 6, np.nan, 8, 10],
                   'VAL2':[123, np.nan, 456, np.nan, 789, 246, 357],
                   'VAL3':['France', 'Greece', 'USA', 'Japan', "Sweden", "Norway", "Turkey"]})

In [181]:
df

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,,Greece
2,,456.0,USA
3,6.0,,Japan
4,,789.0,Sweden
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [182]:
df.isnull()

Unnamed: 0,VAL1,VAL2,VAL3
0,False,False,False
1,False,True,False
2,True,False,False
3,False,True,False
4,True,False,False
5,False,False,False
6,False,False,False


In [183]:
df.isnull().sum()

VAL1    2
VAL2    2
VAL3    0
dtype: int64

In [184]:
len(df)

7

In [185]:
df.isnull().sum() / len(df) * 100

VAL1    28.571429
VAL2    28.571429
VAL3     0.000000
dtype: float64

In [186]:
df.notnull()

Unnamed: 0,VAL1,VAL2,VAL3
0,True,True,True
1,True,False,True
2,False,True,True
3,True,False,True
4,False,True,True
5,True,True,True
6,True,True,True


In [187]:
df.notnull().sum()

VAL1    5
VAL2    5
VAL3    7
dtype: int64

In [188]:
df.notnull().sum().sum()

17

In [189]:
df["VAL1"].notnull()

0     True
1     True
2    False
3     True
4    False
5     True
6     True
Name: VAL1, dtype: bool

In [190]:
df[df["VAL1"].notnull()]

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,,Greece
3,6.0,,Japan
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [191]:
df

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,,Greece
2,,456.0,USA
3,6.0,,Japan
4,,789.0,Sweden
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [192]:
df.isnull().any()

VAL1     True
VAL2     True
VAL3    False
dtype: bool

In [193]:
df.isnull().any(axis = 1)

0    False
1     True
2     True
3     True
4     True
5    False
6    False
dtype: bool

In [194]:
condition = df.isnull().any(axis = 1)

In [195]:
df[condition]

Unnamed: 0,VAL1,VAL2,VAL3
1,4.0,,Greece
2,,456.0,USA
3,6.0,,Japan
4,,789.0,Sweden


In [196]:
df[~condition]

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [197]:
df

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,,Greece
2,,456.0,USA
3,6.0,,Japan
4,,789.0,Sweden
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [198]:
df.notnull().all()

VAL1    False
VAL2    False
VAL3     True
dtype: bool

In [199]:
df.isnull().all(axis = 1)

0    False
1    False
2    False
3    False
4    False
5    False
6    False
dtype: bool

## Boş(Null) Değerleri Düşürme: Dropna() Fonksiyonu

In [200]:
import pandas as pd
import numpy as np

In [201]:
df = pd.DataFrame({'VAL1':[2, 4, np.nan, 6, np.nan, 8, 10],
                   'VAL2':[123, np.nan, 456, np.nan, 789, 246, 357],
                   'VAL3':['France', 'Greece', 'USA', 'Japan', "Sweden", "Norway", "Turkey"]})

In [202]:
df

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,,Greece
2,,456.0,USA
3,6.0,,Japan
4,,789.0,Sweden
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [203]:
df.dropna()

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [204]:
df

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,,Greece
2,,456.0,USA
3,6.0,,Japan
4,,789.0,Sweden
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [205]:
df.dropna(axis = 1)

Unnamed: 0,VAL3
0,France
1,Greece
2,USA
3,Japan
4,Sweden
5,Norway
6,Turkey


In [206]:
df["VAL4"] = np.nan

In [207]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4
0,2.0,123.0,France,
1,4.0,,Greece,
2,,456.0,USA,
3,6.0,,Japan,
4,,789.0,Sweden,
5,8.0,246.0,Norway,
6,10.0,357.0,Turkey,


In [208]:
df.dropna(how = "all")

Unnamed: 0,VAL1,VAL2,VAL3,VAL4
0,2.0,123.0,France,
1,4.0,,Greece,
2,,456.0,USA,
3,6.0,,Japan,
4,,789.0,Sweden,
5,8.0,246.0,Norway,
6,10.0,357.0,Turkey,


In [209]:
df.dropna(how = "all", axis = 1)

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,,Greece
2,,456.0,USA
3,6.0,,Japan
4,,789.0,Sweden
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [210]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4
0,2.0,123.0,France,
1,4.0,,Greece,
2,,456.0,USA,
3,6.0,,Japan,
4,,789.0,Sweden,
5,8.0,246.0,Norway,
6,10.0,357.0,Turkey,


In [211]:
df.dropna(how = "all", axis = 1, inplace = True)

In [212]:
df

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,,Greece
2,,456.0,USA
3,6.0,,Japan
4,,789.0,Sweden
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


## Boş(Null) Değerleri Doldurma: Fillna() Fonksiyonu

In [213]:
import pandas as pd
import numpy as np

In [214]:
df = pd.DataFrame({'VAL1':[2, 4, np.nan, 6, np.nan, 8, 10],
                   'VAL2':[123, np.nan, 456, np.nan, 789, 246, 357],
                   'VAL3':['France', 'Greece', 'USA', 'Japan', "Sweden", "Norway", "Turkey"]})

In [215]:
df

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,,Greece
2,,456.0,USA
3,6.0,,Japan
4,,789.0,Sweden
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [216]:
df.fillna(100000)

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,100000.0,Greece
2,100000.0,456.0,USA
3,6.0,100000.0,Japan
4,100000.0,789.0,Sweden
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [217]:
df

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,,Greece
2,,456.0,USA
3,6.0,,Japan
4,,789.0,Sweden
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [218]:
df["VAL1"].fillna(100000)

0         2.0
1         4.0
2    100000.0
3         6.0
4    100000.0
5         8.0
6        10.0
Name: VAL1, dtype: float64

In [219]:
df

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,,Greece
2,,456.0,USA
3,6.0,,Japan
4,,789.0,Sweden
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [220]:
df.mean()

VAL1      6.0
VAL2    394.2
dtype: float64

In [221]:
df.fillna(df.mean())

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,394.2,Greece
2,6.0,456.0,USA
3,6.0,394.2,Japan
4,6.0,789.0,Sweden
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [222]:
df

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,,Greece
2,,456.0,USA
3,6.0,,Japan
4,,789.0,Sweden
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [223]:
df.fillna({"VAL1":100000, "VAL2":200000})

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,200000.0,Greece
2,100000.0,456.0,USA
3,6.0,200000.0,Japan
4,100000.0,789.0,Sweden
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [224]:
df["VAL1"].fillna(df["VAL2"].mean())

0      2.0
1      4.0
2    394.2
3      6.0
4    394.2
5      8.0
6     10.0
Name: VAL1, dtype: float64

In [225]:
df

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,France
1,4.0,,Greece
2,,456.0,USA
3,6.0,,Japan
4,,789.0,Sweden
5,8.0,246.0,Norway
6,10.0,357.0,Turkey


In [226]:
df["VAL3"][0::2]

0    France
2       USA
4    Sweden
6    Turkey
Name: VAL3, dtype: object

In [227]:
df["VAL3"][0::2] = np.nan

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["VAL3"][0::2] = np.nan


In [228]:
df

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,
1,4.0,,Greece
2,,456.0,
3,6.0,,Japan
4,,789.0,
5,8.0,246.0,Norway
6,10.0,357.0,


In [229]:
df["VAL3"].fillna("Turkey")

0    Turkey
1    Greece
2    Turkey
3     Japan
4    Turkey
5    Norway
6    Turkey
Name: VAL3, dtype: object

In [230]:
df.fillna(method = "ffill")

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,
1,4.0,123.0,Greece
2,4.0,456.0,Greece
3,6.0,456.0,Japan
4,6.0,789.0,Japan
5,8.0,246.0,Norway
6,10.0,357.0,Norway


In [231]:
df.fillna(method = "pad")

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,
1,4.0,123.0,Greece
2,4.0,456.0,Greece
3,6.0,456.0,Japan
4,6.0,789.0,Japan
5,8.0,246.0,Norway
6,10.0,357.0,Norway


In [232]:
df.fillna(method = "bfill")

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,Greece
1,4.0,456.0,Greece
2,6.0,456.0,Japan
3,6.0,789.0,Japan
4,8.0,789.0,Norway
5,8.0,246.0,Norway
6,10.0,357.0,


In [233]:
df.fillna(method = "backfill")

Unnamed: 0,VAL1,VAL2,VAL3
0,2.0,123.0,Greece
1,4.0,456.0,Greece
2,6.0,456.0,Japan
3,6.0,789.0,Japan
4,8.0,789.0,Norway
5,8.0,246.0,Norway
6,10.0,357.0,
