# Pandas

In [1]:
import numpy as np
import pandas as pd
import math

In [2]:
import warnings
warnings.filterwarnings('ignore')

## Seriler

### Array'den Seri olusturma

In [3]:
v = np.arange(1,8)
s1 = pd.Series(v)
s1

0    1
1    2
2    3
3    4
4    5
5    6
6    7
dtype: int64

---

### Temel bilgiler edinme

In [4]:
s1.dtype

dtype('int64')

In [5]:
s1.nbytes

56

In [6]:
s1.values.itemsize

8

In [7]:
s1.shape

(7,)

In [8]:
s1.count()

7

---

### Seri olusturma

In [9]:
s0 = pd.Series([1,2,3], index=['a','b','c'])
s0

a    1
b    2
c    3
dtype: int64

---

### Index degistirme

In [10]:
s1.index = ['a','b','c','d','e','f','g']
s1

a    1
b    2
c    3
d    4
e    5
f    6
g    7
dtype: int64

---

In [11]:
v2 = np.random.random(10)
ind2 = np.arange(10)
s = pd.Series(v2, ind2)
v2, ind2, s

(array([0.1150598 , 0.4265492 , 0.98173682, 0.08566313, 0.38188479,
        0.16767612, 0.26763496, 0.17231019, 0.45502326, 0.83718543]),
 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 0    0.115060
 1    0.426549
 2    0.981737
 3    0.085663
 4    0.381885
 5    0.167676
 6    0.267635
 7    0.172310
 8    0.455023
 9    0.837185
 dtype: float64)

---

### Dictionary'den Series olusturma

In [12]:
dict1 = {'a1': 10, 'a2': 20, 'a3': 30, 'a4': 40}
s3 = pd.Series(dict1)
s3

a1    10
a2    20
a3    30
a4    40
dtype: int64

---

In [13]:
pd.Series(99, index=[0,1,2,3,4,5])

0    99
1    99
2    99
3    99
4    99
5    99
dtype: int64

---

### Seriler'de secme

In [14]:
s

0    0.115060
1    0.426549
2    0.981737
3    0.085663
4    0.381885
5    0.167676
6    0.267635
7    0.172310
8    0.455023
9    0.837185
dtype: float64

In [15]:
s[0:3]

0    0.115060
1    0.426549
2    0.981737
dtype: float64

In [16]:
s[-1:]

9    0.837185
dtype: float64

---

### Serileri birbirine ekleme

In [17]:
s2 = s1.copy()
s2

a    1
b    2
c    3
d    4
e    5
f    6
g    7
dtype: int64

In [18]:
s4 = s2.append(s3)
s4

a      1
b      2
c      3
d      4
e      5
f      6
g      7
a1    10
a2    20
a3    30
a4    40
dtype: int64

---

### Silme

In [19]:
s4.drop('a4', inplace=False)
#inplace False oldugu icin kalici olarak silmedi

a      1
b      2
c      3
d      4
e      5
f      6
g      7
a1    10
a2    20
a3    30
dtype: int64

In [20]:
#Kalici olarak silelim
s4.drop('a4', inplace=True)
s4

a      1
b      2
c      3
d      4
e      5
f      6
g      7
a1    10
a2    20
a3    30
dtype: int64

---

### Satir ekleme

In [21]:
s4 = s4.append(pd.Series({'a4':7}))
s4

a      1
b      2
c      3
d      4
e      5
f      6
g      7
a1    10
a2    20
a3    30
a4     7
dtype: int64

---

### Serilerde Operasyonlar ( +-/* )

In [22]:
v1 = np.array([10,20,30])
v2 = np.array([1,2,3])
s1 = pd.Series(v1)
s2 = pd.Series(v2)
s1, s2

(0    10
 1    20
 2    30
 dtype: int64,
 0    1
 1    2
 2    3
 dtype: int64)

### Toplama

In [23]:
s1.add(s2)

0    11
1    22
2    33
dtype: int64

In [24]:
s1.add(9) # butun elemanlara 9 ekler

0    19
1    29
2    39
dtype: int64

### Cikartma

In [25]:
#1
s1.sub(s2)
#2
s1.subtract(s2)

0     9
1    18
2    27
dtype: int64

### Carpma

In [26]:
#1
s1.mul(s2)
#2
s1.multiply(s2)

0    10
1    40
2    90
dtype: int64

### Bolme

In [27]:
#1
s1.div(s2)
#2
s1.divide(s2)

0    10.0
1    10.0
2    10.0
dtype: float64

---

### Hangi degerden kac adet var?

In [28]:
s5 = pd.Series([1,1,2,2,3,3], index = [0,1,2,3,4,5])
s5

0    1
1    1
2    2
3    2
4    3
5    3
dtype: int64

In [29]:
s5.value_counts()

1    2
2    2
3    2
dtype: int64

---

# DataFrames

### DataFrame Olusturma Cesitleri

In [30]:
lang = ['Java', 'Python', 'C', 'C++']
df = pd.DataFrame(lang)
df

Unnamed: 0,0
0,Java
1,Python
2,C
3,C++


In [31]:
#Yeni sutun ekleme
rating = [1,2,3,4]
df[1] = rating
df

Unnamed: 0,0,1
0,Java,1
1,Python,2
2,C,3
3,C++,4


In [32]:
#Sutun isimlerini degistirme
df.columns=['Language','Rating']
df

Unnamed: 0,Language,Rating
0,Java,1
1,Python,2
2,C,3
3,C++,4


In [33]:
data = [{'a':1, 'b':2}, {'a':5,'b':10,'c':20}]

df2 = pd.DataFrame(data) # index vermeden olusturma
df3 = pd.DataFrame(data, index = ['row1', 'row2'], columns=['a','b'])
df4 = pd.DataFrame(data, index = ['row1', 'row2'], columns=['a','b','c'])
df5 = pd.DataFrame(data, index = ['row1', 'row2'], columns=['a','b','c','d'])

In [34]:
df2

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [35]:
df3

Unnamed: 0,a,b
row1,1,2
row2,5,10


In [36]:
df4

Unnamed: 0,a,b,c
row1,1,2,
row2,5,10,20.0


In [37]:
df5

Unnamed: 0,a,b,c,d
row1,1,2,,
row2,5,10,20.0,


In [38]:
#Dictionary den df olusturma
dict1 = {'A' : pd.Series([1,2,3], index= ['a','b','c']),
         'B' : pd.Series([1,2,3,4], index= ['a','b','c','d'])}

df1 = pd.DataFrame(dict1)
df1

Unnamed: 0,A,B
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


---

### Datetime index

In [39]:
dates = pd.date_range(start = '2022-11-22', end='2022-11-28')
dates

DatetimeIndex(['2022-11-22', '2022-11-23', '2022-11-24', '2022-11-25',
               '2022-11-26', '2022-11-27', '2022-11-28'],
              dtype='datetime64[ns]', freq='D')

In [40]:
dates = pd.date_range('today', periods=7) # Bugunden basla 7 gun demek
dates

DatetimeIndex(['2023-11-23 01:09:11.005263', '2023-11-24 01:09:11.005263',
               '2023-11-25 01:09:11.005263', '2023-11-26 01:09:11.005263',
               '2023-11-27 01:09:11.005263', '2023-11-28 01:09:11.005263',
               '2023-11-29 01:09:11.005263'],
              dtype='datetime64[ns]', freq='D')

In [41]:
dates = pd.date_range(start='2022-11-22', periods=7)
dates

DatetimeIndex(['2022-11-22', '2022-11-23', '2022-11-24', '2022-11-25',
               '2022-11-26', '2022-11-27', '2022-11-28'],
              dtype='datetime64[ns]', freq='D')

---

### Dates verisini dataframe'in indexine atma

In [42]:
M = np.random.random((7,7))

In [43]:
dframe = pd.DataFrame(M, index= dates)
dframe

Unnamed: 0,0,1,2,3,4,5,6
2022-11-22,0.181624,0.099363,0.184918,0.76357,0.843187,0.624249,0.655874
2022-11-23,0.580381,0.837915,0.293135,0.027591,0.922231,0.761267,0.856487
2022-11-24,0.53313,0.716652,0.900917,0.339055,0.291761,0.823687,0.155479
2022-11-25,0.148933,0.287608,0.438885,0.661513,0.880717,0.246744,0.120721
2022-11-26,0.688791,0.277962,0.63893,0.010426,0.566283,0.257925,0.662456
2022-11-27,0.380874,0.67492,0.04521,0.84506,0.426547,0.235787,0.583502
2022-11-28,0.710874,0.595329,0.382119,0.8958,0.478124,0.996588,0.197125


In [44]:
#Sutun isimlerini degistirelim
dframe.columns = ['C1','C2','C3','C4','C5','C6','C7']
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,0.181624,0.099363,0.184918,0.76357,0.843187,0.624249,0.655874
2022-11-23,0.580381,0.837915,0.293135,0.027591,0.922231,0.761267,0.856487
2022-11-24,0.53313,0.716652,0.900917,0.339055,0.291761,0.823687,0.155479
2022-11-25,0.148933,0.287608,0.438885,0.661513,0.880717,0.246744,0.120721
2022-11-26,0.688791,0.277962,0.63893,0.010426,0.566283,0.257925,0.662456
2022-11-27,0.380874,0.67492,0.04521,0.84506,0.426547,0.235787,0.583502
2022-11-28,0.710874,0.595329,0.382119,0.8958,0.478124,0.996588,0.197125


In [45]:
dframe.index

DatetimeIndex(['2022-11-22', '2022-11-23', '2022-11-24', '2022-11-25',
               '2022-11-26', '2022-11-27', '2022-11-28'],
              dtype='datetime64[ns]', freq='D')

In [46]:
dframe.columns

Index(['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7'], dtype='object')

In [47]:
dframe.dtypes

C1    float64
C2    float64
C3    float64
C4    float64
C5    float64
C6    float64
C7    float64
dtype: object

---

### Siralama ascending&descending

In [48]:
dframe.sort_values(by='C1') #C1 sutununa gore kucukten buyuge sirala

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-25,0.148933,0.287608,0.438885,0.661513,0.880717,0.246744,0.120721
2022-11-22,0.181624,0.099363,0.184918,0.76357,0.843187,0.624249,0.655874
2022-11-27,0.380874,0.67492,0.04521,0.84506,0.426547,0.235787,0.583502
2022-11-24,0.53313,0.716652,0.900917,0.339055,0.291761,0.823687,0.155479
2022-11-23,0.580381,0.837915,0.293135,0.027591,0.922231,0.761267,0.856487
2022-11-26,0.688791,0.277962,0.63893,0.010426,0.566283,0.257925,0.662456
2022-11-28,0.710874,0.595329,0.382119,0.8958,0.478124,0.996588,0.197125


In [49]:
dframe.sort_values(by='C1',ascending=False) # C1 sutununa gore buyukten kucuge sirala

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-28,0.710874,0.595329,0.382119,0.8958,0.478124,0.996588,0.197125
2022-11-26,0.688791,0.277962,0.63893,0.010426,0.566283,0.257925,0.662456
2022-11-23,0.580381,0.837915,0.293135,0.027591,0.922231,0.761267,0.856487
2022-11-24,0.53313,0.716652,0.900917,0.339055,0.291761,0.823687,0.155479
2022-11-27,0.380874,0.67492,0.04521,0.84506,0.426547,0.235787,0.583502
2022-11-22,0.181624,0.099363,0.184918,0.76357,0.843187,0.624249,0.655874
2022-11-25,0.148933,0.287608,0.438885,0.661513,0.880717,0.246744,0.120721


---

### Sutun silme

In [50]:
df1

Unnamed: 0,A,B
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [51]:
# 1.silme yontemi
del df1['B'] # df1 in B sutununu sildik
df1

Unnamed: 0,A
a,1.0
b,2.0
c,3.0
d,


In [52]:
df5

Unnamed: 0,a,b,c,d
row1,1,2,,
row2,5,10,20.0,


In [53]:
# 2.silme yontemi
df5.pop('c')

row1     NaN
row2    20.0
Name: c, dtype: float64

In [54]:
dict1 = {'A' : pd.Series([1,2,3], index= ['a','b','c']),
         'B' : pd.Series([1,2,3,4], index= ['a','b','c','d'])}

df12 = pd.DataFrame(dict1)
df12

Unnamed: 0,A,B
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [55]:
# 3.Silme yontemi
df12.drop(['A'], axis=1, inplace=True)
df12

Unnamed: 0,B
a,1
b,2
c,3
d,4


---

### Satir silme

In [56]:
col1 = np.linspace(10,100,30)
col2 = np.random.randint(10,100,30)
df10 = pd.DataFrame({'C1': col1,'C2': col2})
df10

Unnamed: 0,C1,C2
0,10.0,80
1,13.103448,40
2,16.206897,94
3,19.310345,50
4,22.413793,49
5,25.517241,25
6,28.62069,63
7,31.724138,92
8,34.827586,77
9,37.931034,30


In [57]:
df10 = df10.drop([17,18,19], axis = 0)
df10

Unnamed: 0,C1,C2
0,10.0,80
1,13.103448,40
2,16.206897,94
3,19.310345,50
4,22.413793,49
5,25.517241,25
6,28.62069,63
7,31.724138,92
8,34.827586,77
9,37.931034,30


In [58]:
df10.drop([15], axis=0, inplace=True)
df10

Unnamed: 0,C1,C2
0,10.0,80
1,13.103448,40
2,16.206897,94
3,19.310345,50
4,22.413793,49
5,25.517241,25
6,28.62069,63
7,31.724138,92
8,34.827586,77
9,37.931034,30


In [59]:
df10.drop(df10.index[5], inplace=True)
df10

Unnamed: 0,C1,C2
0,10.0,80
1,13.103448,40
2,16.206897,94
3,19.310345,50
4,22.413793,49
6,28.62069,63
7,31.724138,92
8,34.827586,77
9,37.931034,30
10,41.034483,98


In [60]:
df10 = df10.iloc[3:,]
df10

Unnamed: 0,C1,C2
3,19.310345,50
4,22.413793,49
6,28.62069,63
7,31.724138,92
8,34.827586,77
9,37.931034,30
10,41.034483,98
11,44.137931,45
12,47.241379,76
13,50.344828,97


In [61]:
df10 = df10.iloc[:-4,]
df10

Unnamed: 0,C1,C2
3,19.310345,50
4,22.413793,49
6,28.62069,63
7,31.724138,92
8,34.827586,77
9,37.931034,30
10,41.034483,98
11,44.137931,45
12,47.241379,76
13,50.344828,97


In [62]:
df10 = df10.iloc[:10,]
df10

Unnamed: 0,C1,C2
3,19.310345,50
4,22.413793,49
6,28.62069,63
7,31.724138,92
8,34.827586,77
9,37.931034,30
10,41.034483,98
11,44.137931,45
12,47.241379,76
13,50.344828,97


In [63]:
df10.index[df10['C2']==54].tolist()

[]

In [64]:
df10.drop(df10.index[df10['C2']==54].tolist(), axis=0, inplace=True)
df10

Unnamed: 0,C1,C2
3,19.310345,50
4,22.413793,49
6,28.62069,63
7,31.724138,92
8,34.827586,77
9,37.931034,30
10,41.034483,98
11,44.137931,45
12,47.241379,76
13,50.344828,97


In [65]:
df10 = df10.drop(df10[df10['C2']==79].index)
df10

Unnamed: 0,C1,C2
3,19.310345,50
4,22.413793,49
6,28.62069,63
7,31.724138,92
8,34.827586,77
9,37.931034,30
10,41.034483,98
11,44.137931,45
12,47.241379,76
13,50.344828,97


In [66]:
df10 = df10[df10.C2 != 43]
df10

Unnamed: 0,C1,C2
3,19.310345,50
4,22.413793,49
6,28.62069,63
7,31.724138,92
8,34.827586,77
9,37.931034,30
10,41.034483,98
11,44.137931,45
12,47.241379,76
13,50.344828,97


In [67]:
df10 = df10[~(df10.C2.isin([76,91]))]
df10

Unnamed: 0,C1,C2
3,19.310345,50
4,22.413793,49
6,28.62069,63
7,31.724138,92
8,34.827586,77
9,37.931034,30
10,41.034483,98
11,44.137931,45
13,50.344828,97


In [68]:
df10 = df10[(df10.C2.isin([48,55]))]
df10

Unnamed: 0,C1,C2


In [69]:
dict1 = {'A' : pd.Series([1,2,3], index= ['a','b','c']),
         'B' : pd.Series([1,2,3,4], index= ['a','b','c','d'])}

df11 = pd.DataFrame(dict1)
df11

Unnamed: 0,A,B
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [70]:
df11.drop('d',axis=0, inplace=True)
df11

Unnamed: 0,A,B
a,1.0,1
b,2.0,2
c,3.0,3


In [71]:
df13 = pd.DataFrame({'ID' : [1,2,3,4],
                     'Name' : ['Berna', 'Efkan', 'Hilal', 'Dogu'],
                     'location' : ['UK', 'US', 'Austria', 'Spain']})
df13

Unnamed: 0,ID,Name,location
0,1,Berna,UK
1,2,Efkan,US
2,3,Hilal,Austria
3,4,Dogu,Spain


In [72]:
ind = df13[((df13.ID == 4) & (df13.Name == 'Dogu')& (df13.location == 'Spain'))].index
df13.drop(ind, inplace=True)
df13

Unnamed: 0,ID,Name,location
0,1,Berna,UK
1,2,Efkan,US
2,3,Hilal,Austria


---

### Veri Secme

In [73]:
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,0.181624,0.099363,0.184918,0.76357,0.843187,0.624249,0.655874
2022-11-23,0.580381,0.837915,0.293135,0.027591,0.922231,0.761267,0.856487
2022-11-24,0.53313,0.716652,0.900917,0.339055,0.291761,0.823687,0.155479
2022-11-25,0.148933,0.287608,0.438885,0.661513,0.880717,0.246744,0.120721
2022-11-26,0.688791,0.277962,0.63893,0.010426,0.566283,0.257925,0.662456
2022-11-27,0.380874,0.67492,0.04521,0.84506,0.426547,0.235787,0.583502
2022-11-28,0.710874,0.595329,0.382119,0.8958,0.478124,0.996588,0.197125


In [74]:
dframe['2022-11-22': '2022-11-24']

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,0.181624,0.099363,0.184918,0.76357,0.843187,0.624249,0.655874
2022-11-23,0.580381,0.837915,0.293135,0.027591,0.922231,0.761267,0.856487
2022-11-24,0.53313,0.716652,0.900917,0.339055,0.291761,0.823687,0.155479


In [75]:
dframe.loc[:, ['C1', 'C7']]

Unnamed: 0,C1,C7
2022-11-22,0.181624,0.655874
2022-11-23,0.580381,0.856487
2022-11-24,0.53313,0.155479
2022-11-25,0.148933,0.120721
2022-11-26,0.688791,0.662456
2022-11-27,0.380874,0.583502
2022-11-28,0.710874,0.197125


In [76]:
dframe[dframe['C1']> 0.5]

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-23,0.580381,0.837915,0.293135,0.027591,0.922231,0.761267,0.856487
2022-11-24,0.53313,0.716652,0.900917,0.339055,0.291761,0.823687,0.155479
2022-11-26,0.688791,0.277962,0.63893,0.010426,0.566283,0.257925,0.662456
2022-11-28,0.710874,0.595329,0.382119,0.8958,0.478124,0.996588,0.197125


In [77]:
dframe[(dframe['C1']>0.2)&(dframe['C4']>0.5)]

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-27,0.380874,0.67492,0.04521,0.84506,0.426547,0.235787,0.583502
2022-11-28,0.710874,0.595329,0.382119,0.8958,0.478124,0.996588,0.197125


In [78]:
dframe.iloc[0][0]

0.18162352345810706

In [79]:
dframe.iloc[:, 0:3]

Unnamed: 0,C1,C2,C3
2022-11-22,0.181624,0.099363,0.184918
2022-11-23,0.580381,0.837915,0.293135
2022-11-24,0.53313,0.716652,0.900917
2022-11-25,0.148933,0.287608,0.438885
2022-11-26,0.688791,0.277962,0.63893
2022-11-27,0.380874,0.67492,0.04521
2022-11-28,0.710874,0.595329,0.382119


---

### Deger Atama

In [80]:
dframe['C1'] = 888
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.099363,0.184918,0.76357,0.843187,0.624249,0.655874
2022-11-23,888,0.837915,0.293135,0.027591,0.922231,0.761267,0.856487
2022-11-24,888,0.716652,0.900917,0.339055,0.291761,0.823687,0.155479
2022-11-25,888,0.287608,0.438885,0.661513,0.880717,0.246744,0.120721
2022-11-26,888,0.277962,0.63893,0.010426,0.566283,0.257925,0.662456
2022-11-27,888,0.67492,0.04521,0.84506,0.426547,0.235787,0.583502
2022-11-28,888,0.595329,0.382119,0.8958,0.478124,0.996588,0.197125


In [81]:
dframe.loc[0:3, 'C6']= 777
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.099363,0.184918,0.76357,0.843187,777.0,0.655874
2022-11-23,888,0.837915,0.293135,0.027591,0.922231,777.0,0.856487
2022-11-24,888,0.716652,0.900917,0.339055,0.291761,777.0,0.155479
2022-11-25,888,0.287608,0.438885,0.661513,0.880717,0.246744,0.120721
2022-11-26,888,0.277962,0.63893,0.010426,0.566283,0.257925,0.662456
2022-11-27,888,0.67492,0.04521,0.84506,0.426547,0.235787,0.583502
2022-11-28,888,0.595329,0.382119,0.8958,0.478124,0.996588,0.197125


In [82]:
dframe.iloc[0,2]=333
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.099363,333.0,0.76357,0.843187,777.0,0.655874
2022-11-23,888,0.837915,0.293135,0.027591,0.922231,777.0,0.856487
2022-11-24,888,0.716652,0.900917,0.339055,0.291761,777.0,0.155479
2022-11-25,888,0.287608,0.438885,0.661513,0.880717,0.246744,0.120721
2022-11-26,888,0.277962,0.63893,0.010426,0.566283,0.257925,0.662456
2022-11-27,888,0.67492,0.04521,0.84506,0.426547,0.235787,0.583502
2022-11-28,888,0.595329,0.382119,0.8958,0.478124,0.996588,0.197125


In [83]:
dframe.iloc[0, 2] = 555
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.099363,555.0,0.76357,0.843187,777.0,0.655874
2022-11-23,888,0.837915,0.293135,0.027591,0.922231,777.0,0.856487
2022-11-24,888,0.716652,0.900917,0.339055,0.291761,777.0,0.155479
2022-11-25,888,0.287608,0.438885,0.661513,0.880717,0.246744,0.120721
2022-11-26,888,0.277962,0.63893,0.010426,0.566283,0.257925,0.662456
2022-11-27,888,0.67492,0.04521,0.84506,0.426547,0.235787,0.583502
2022-11-28,888,0.595329,0.382119,0.8958,0.478124,0.996588,0.197125


---

### Null degerler ile ilgilenme

In [84]:
dframe.loc[0:8, 'C7'] = np.NaN
dframe.loc[0:2, 'C6'] = np.NaN
dframe.loc[5:6, 'C5'] = np.NaN
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.099363,555.0,0.76357,0.843187,,
2022-11-23,888,0.837915,0.293135,0.027591,0.922231,,
2022-11-24,888,0.716652,0.900917,0.339055,0.291761,777.0,
2022-11-25,888,0.287608,0.438885,0.661513,0.880717,0.246744,
2022-11-26,888,0.277962,0.63893,0.010426,0.566283,0.257925,
2022-11-27,888,0.67492,0.04521,0.84506,,0.235787,
2022-11-28,888,0.595329,0.382119,0.8958,0.478124,0.996588,


In [85]:
dframe.notna()#Nan olmayanlar true doner

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,True,True,True,True,True,False,False
2022-11-23,True,True,True,True,True,False,False
2022-11-24,True,True,True,True,True,True,False
2022-11-25,True,True,True,True,True,True,False
2022-11-26,True,True,True,True,True,True,False
2022-11-27,True,True,True,True,False,True,False
2022-11-28,True,True,True,True,True,True,False


In [86]:
dframe.isna()#Nan olanlar true doner

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,False,False,False,False,False,True,True
2022-11-23,False,False,False,False,False,True,True
2022-11-24,False,False,False,False,False,False,True
2022-11-25,False,False,False,False,False,False,True
2022-11-26,False,False,False,False,False,False,True
2022-11-27,False,False,False,False,True,False,True
2022-11-28,False,False,False,False,False,False,True


In [87]:
# Nan degerleri doldurma
dframe = dframe.fillna(1020)
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.099363,555.0,0.76357,0.843187,1020.0,1020.0
2022-11-23,888,0.837915,0.293135,0.027591,0.922231,1020.0,1020.0
2022-11-24,888,0.716652,0.900917,0.339055,0.291761,777.0,1020.0
2022-11-25,888,0.287608,0.438885,0.661513,0.880717,0.246744,1020.0
2022-11-26,888,0.277962,0.63893,0.010426,0.566283,0.257925,1020.0
2022-11-27,888,0.67492,0.04521,0.84506,1020.0,0.235787,1020.0
2022-11-28,888,0.595329,0.382119,0.8958,0.478124,0.996588,1020.0


In [88]:
dframe.loc[0:5, 'C7'] = np.NaN
dframe.loc[0:2, 'C6'] = np.NaN
dframe.loc[5:6, 'C5'] = np.NaN
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.099363,555.0,0.76357,0.843187,,
2022-11-23,888,0.837915,0.293135,0.027591,0.922231,,
2022-11-24,888,0.716652,0.900917,0.339055,0.291761,777.0,
2022-11-25,888,0.287608,0.438885,0.661513,0.880717,0.246744,
2022-11-26,888,0.277962,0.63893,0.010426,0.566283,0.257925,
2022-11-27,888,0.67492,0.04521,0.84506,,0.235787,1020.0
2022-11-28,888,0.595329,0.382119,0.8958,0.478124,0.996588,1020.0


In [89]:
#spesifik bir sutunu doldurma
dframe.fillna(value = {'C5':123, 'C6': 789})

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.099363,555.0,0.76357,0.843187,789.0,
2022-11-23,888,0.837915,0.293135,0.027591,0.922231,789.0,
2022-11-24,888,0.716652,0.900917,0.339055,0.291761,777.0,
2022-11-25,888,0.287608,0.438885,0.661513,0.880717,0.246744,
2022-11-26,888,0.277962,0.63893,0.010426,0.566283,0.257925,
2022-11-27,888,0.67492,0.04521,0.84506,123.0,0.235787,1020.0
2022-11-28,888,0.595329,0.382119,0.8958,0.478124,0.996588,1020.0


In [90]:
dframe.fillna(value = {'C7': 789}, limit=1)# c7 sutununda sadece 1 degeri doldurma

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.099363,555.0,0.76357,0.843187,,789.0
2022-11-23,888,0.837915,0.293135,0.027591,0.922231,,
2022-11-24,888,0.716652,0.900917,0.339055,0.291761,777.0,
2022-11-25,888,0.287608,0.438885,0.661513,0.880717,0.246744,
2022-11-26,888,0.277962,0.63893,0.010426,0.566283,0.257925,
2022-11-27,888,0.67492,0.04521,0.84506,,0.235787,1020.0
2022-11-28,888,0.595329,0.382119,0.8958,0.478124,0.996588,1020.0


In [91]:
# nan degerleri satir bazli silme
dframe.dropna()

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-28,888,0.595329,0.382119,0.8958,0.478124,0.996588,1020.0


In [92]:
# Nan degerleri sutun bazli silme
dframe.dropna(axis=1)

Unnamed: 0,C1,C2,C3,C4
2022-11-22,888,0.099363,555.0,0.76357
2022-11-23,888,0.837915,0.293135,0.027591
2022-11-24,888,0.716652,0.900917,0.339055
2022-11-25,888,0.287608,0.438885,0.661513
2022-11-26,888,0.277962,0.63893,0.010426
2022-11-27,888,0.67492,0.04521,0.84506
2022-11-28,888,0.595329,0.382119,0.8958


In [93]:
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.099363,555.0,0.76357,0.843187,,
2022-11-23,888,0.837915,0.293135,0.027591,0.922231,,
2022-11-24,888,0.716652,0.900917,0.339055,0.291761,777.0,
2022-11-25,888,0.287608,0.438885,0.661513,0.880717,0.246744,
2022-11-26,888,0.277962,0.63893,0.010426,0.566283,0.257925,
2022-11-27,888,0.67492,0.04521,0.84506,,0.235787,1020.0
2022-11-28,888,0.595329,0.382119,0.8958,0.478124,0.996588,1020.0


In [94]:
#Sadece istedigimiz sutundaki nan degerleri satir bazli silme
dframe.dropna(subset = ['C5','C6'])

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-24,888,0.716652,0.900917,0.339055,0.291761,777.0,
2022-11-25,888,0.287608,0.438885,0.661513,0.880717,0.246744,
2022-11-26,888,0.277962,0.63893,0.010426,0.566283,0.257925,
2022-11-28,888,0.595329,0.382119,0.8958,0.478124,0.996588,1020.0


In [95]:
dframe.fillna(55, inplace=True)
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.099363,555.0,0.76357,0.843187,55.0,55.0
2022-11-23,888,0.837915,0.293135,0.027591,0.922231,55.0,55.0
2022-11-24,888,0.716652,0.900917,0.339055,0.291761,777.0,55.0
2022-11-25,888,0.287608,0.438885,0.661513,0.880717,0.246744,55.0
2022-11-26,888,0.277962,0.63893,0.010426,0.566283,0.257925,55.0
2022-11-27,888,0.67492,0.04521,0.84506,55.0,0.235787,1020.0
2022-11-28,888,0.595329,0.382119,0.8958,0.478124,0.996588,1020.0


In [96]:
dframe.mean()

C1    888.000000
C2      0.498536
C3     79.671314
C4      0.506145
C5      8.426043
C6    126.962435
C7    330.714286
dtype: float64

In [97]:
dframe.max()

C1     888.000000
C2       0.837915
C3     555.000000
C4       0.895800
C5      55.000000
C6     777.000000
C7    1020.000000
dtype: float64

In [98]:
dframe.min()

C1    888.000000
C2      0.099363
C3      0.045210
C4      0.010426
C5      0.291761
C6      0.235787
C7     55.000000
dtype: float64

In [99]:
# mean, std, var, sum

In [100]:
dframe.describe()

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
count,7.0,7.0,7.0,7.0,7.0,7.0,7.0
mean,888.0,0.498536,79.671314,0.506145,8.426043,126.962435,330.714286
std,0.0,0.275595,209.600421,0.378544,20.538517,287.791579,470.871785
min,888.0,0.099363,0.04521,0.010426,0.291761,0.235787,55.0
25%,888.0,0.282785,0.337627,0.183323,0.522203,0.252335,55.0
50%,888.0,0.595329,0.438885,0.661513,0.843187,0.996588,55.0
75%,888.0,0.695786,0.769924,0.804315,0.901474,55.0,537.5
max,888.0,0.837915,555.0,0.8958,55.0,777.0,1020.0


In [101]:
# korelasyon
dframe.corr()

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
C1,,,,,,,
C2,,1.0,-0.638893,-0.203766,0.279042,0.34928,0.338569
C3,,-0.638893,1.0,0.299325,-0.163662,-0.109318,-0.258969
C4,,-0.203766,0.299325,1.0,0.394649,-0.216649,0.657399
C5,,0.279042,-0.163662,0.394649,1.0,-0.201634,0.642369
C6,,0.34928,-0.109318,-0.216649,-0.201634,1.0,-0.299908
C7,,0.338569,-0.258969,0.657399,0.642369,-0.299908,1.0


---

### Df uzerine fonksiyon uygulama

In [102]:
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.099363,555.0,0.76357,0.843187,55.0,55.0
2022-11-23,888,0.837915,0.293135,0.027591,0.922231,55.0,55.0
2022-11-24,888,0.716652,0.900917,0.339055,0.291761,777.0,55.0
2022-11-25,888,0.287608,0.438885,0.661513,0.880717,0.246744,55.0
2022-11-26,888,0.277962,0.63893,0.010426,0.566283,0.257925,55.0
2022-11-27,888,0.67492,0.04521,0.84506,55.0,0.235787,1020.0
2022-11-28,888,0.595329,0.382119,0.8958,0.478124,0.996588,1020.0


In [103]:
dframe.apply(max)

C1     888.000000
C2       0.837915
C3     555.000000
C4       0.895800
C5      55.000000
C6     777.000000
C7    1020.000000
dtype: float64

In [104]:
dframe.apply(sum)

C1    6216.000000
C2       3.489750
C3     557.699196
C4       3.543015
C5      58.982303
C6     888.737044
C7    2315.000000
dtype: float64

In [105]:
dframe.apply(sum, axis=1)

2022-11-22    1554.706121
2022-11-23    1000.080872
2022-11-24    1722.248384
2022-11-25     945.515467
2022-11-26     944.751526
2022-11-27    1964.800977
2022-11-28    1911.347961
Freq: D, dtype: float64

In [106]:
dframe.applymap(np.sqrt)

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,29.799329,0.31522,23.558438,0.873825,0.918252,7.416198,7.416198
2022-11-23,29.799329,0.915377,0.541419,0.166105,0.960328,7.416198,7.416198
2022-11-24,29.799329,0.846553,0.949166,0.582284,0.540149,27.87472,7.416198
2022-11-25,29.799329,0.536291,0.662484,0.813334,0.938465,0.496733,7.416198
2022-11-26,29.799329,0.527221,0.799331,0.102107,0.752518,0.507863,7.416198
2022-11-27,29.799329,0.821535,0.212628,0.919271,7.416198,0.485579,31.937439
2022-11-28,29.799329,0.771576,0.618158,0.946467,0.691465,0.998293,31.937439


In [107]:
dframe.apply(lambda x: min(x))

C1    888.000000
C2      0.099363
C3      0.045210
C4      0.010426
C5      0.291761
C6      0.235787
C7     55.000000
dtype: float64

In [108]:
dframe.apply(lambda x: x*x)

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,788544,0.009873,308025.0,0.58304,0.710965,3025.0,3025.0
2022-11-23,788544,0.702102,0.085928,0.000761,0.85051,3025.0,3025.0
2022-11-24,788544,0.51359,0.811651,0.114958,0.085124,603729.0,3025.0
2022-11-25,788544,0.082718,0.19262,0.437599,0.775663,0.060883,3025.0
2022-11-26,788544,0.077263,0.408232,0.000109,0.320676,0.066525,3025.0
2022-11-27,788544,0.455517,0.002044,0.714126,3025.0,0.055595,1040400.0
2022-11-28,788544,0.354417,0.146015,0.802458,0.228602,0.993188,1040400.0


---

### DataFrame Birlestirme (merge)

In [109]:
daf1 = pd.DataFrame({'id' : ['1','2','3','4','5'], 'Name':['Toygar', 'Necmettin', 'Furkan', 'Ali', 'Altun']})
daf1

Unnamed: 0,id,Name
0,1,Toygar
1,2,Necmettin
2,3,Furkan
3,4,Ali
4,5,Altun


In [110]:
daf2 = pd.DataFrame({'id' : ['1','2','6','7','8'], 'Score':[40, 60, 80, 90, 70]})
daf2

Unnamed: 0,id,Score
0,1,40
1,2,60
2,6,80
3,7,90
4,8,70


---

### DataFrame Birlestirme

In [111]:
daf1 = pd.DataFrame({'id':['1','2','3','4','5'],
                     'Name': ['Toygar', 'Necmettin','Furkan','Ali','Altun']})

In [112]:
daf2 = pd.DataFrame({'id': ['1','2','6','7','8'],
                     'Score': [40,60,80,90,70]})

In [115]:
#Id lere gore 2 tablonun birlesimi. Sadece 2 df in ortak kumesini alir
pd.merge(daf1,daf2, on='id', how='inner')

Unnamed: 0,id,Name,Score
0,1,Toygar,40
1,2,Necmettin,60


In [117]:
#Id lere gore 2 tablonun birlesimi id tutmayanlari da getirir
pd.merge(daf1, daf2, on='id', how='outer')

Unnamed: 0,id,Name,Score
0,1,Toygar,40.0
1,2,Necmettin,60.0
2,3,Furkan,
3,4,Ali,
4,5,Altun,
5,6,,80.0
6,7,,90.0
7,8,,70.0


In [119]:
#Left join
pd.merge(daf1, daf2, on='id', how='left')

Unnamed: 0,id,Name,Score
0,1,Toygar,40.0
1,2,Necmettin,60.0
2,3,Furkan,
3,4,Ali,
4,5,Altun,


In [120]:
#Right join
pd.merge(daf1, daf2, on='id', how='right')

Unnamed: 0,id,Name,Score
0,1,Toygar,40
1,2,Necmettin,60
2,6,,80
3,7,,90
4,8,,70


---

In [122]:
covid = pd.read_csv('Data/covid19_2311.csv')
covid.head(3)

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incident_Rate,Case_Fatality_Ratio
0,,,,Afghanistan,2022-11-24 04:21:09,33.93911,67.709953,205391.0,7833.0,,,Afghanistan,527.613031,3.813702
1,,,,Albania,2022-11-24 04:21:09,41.1533,20.1683,333293.0,3594.0,,,Albania,11581.520606,1.07833
2,,,,Algeria,2022-11-24 04:21:09,28.0339,1.6596,271041.0,6881.0,,,Algeria,618.094762,2.53873


### Unique (Kac farkli deger var)

In [124]:
covid['Country_Region'].nunique()

177

In [126]:
covid['Country_Region'].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antarctica', 'Antigua and Barbuda', 'Argentina', 'Armenia', nan,
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bosnia and Herzegovina', 'Botswana', 'Brunei', 'Bulgaria',
       'Burkina Faso', 'Burma', 'Burundi', 'Cabo Verde', 'Cambodia',
       'Cameroon', 'Central African Republic', 'Chad', 'Comoros',
       'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark',
       'Diamond Princess', 'Djibouti', 'Dominica', 'Dominican Republic',
       'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea',
       'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France',
       'Gabon', 'Gambia', 'Georgia', 'Ghana', 'Greece', 'Grenada',
       'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti',
       'Holy See', 'Honduras', '

In [128]:
covid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4016 entries, 0 to 4015
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   FIPS                 3839 non-null   object 
 1   Admin2               0 non-null      float64
 2   Province_State       0 non-null      float64
 3   Country_Region       177 non-null    object 
 4   Last_Update          177 non-null    object 
 5   Lat                  175 non-null    float64
 6   Long_                175 non-null    float64
 7   Confirmed            177 non-null    float64
 8   Deaths               177 non-null    float64
 9   Recovered            0 non-null      float64
 10  Active               0 non-null      float64
 11  Combined_Key         177 non-null    object 
 12  Incident_Rate        172 non-null    float64
 13  Case_Fatality_Ratio  177 non-null    float64
dtypes: float64(10), object(4)
memory usage: 439.4+ KB


In [130]:
covid['Country_Region'].head(10)

0            Afghanistan
1                Albania
2                Algeria
3                Andorra
4                 Angola
5             Antarctica
6    Antigua and Barbuda
7              Argentina
8                Armenia
9                    NaN
Name: Country_Region, dtype: object

---

### Belli sutunlardan yeni df yaratmak

In [131]:
df1 = covid[['Country_Region', 'Confirmed', 'Incident_Rate', 'Deaths']]
df1

Unnamed: 0,Country_Region,Confirmed,Incident_Rate,Deaths
0,Afghanistan,205391.0,527.613031,7833.0
1,Albania,333293.0,11581.520606,3594.0
2,Algeria,271041.0,618.094762,6881.0
3,Andorra,46824.0,60601.824888,156.0
4,Angola,104491.0,317.927791,1923.0
...,...,...,...,...
4011,West Bank and Gaza,703036.0,13781.193300,5708.0
4012,Winter Olympics 2022,535.0,,0.0
4013,Yemen,11945.0,40.048994,2159.0
4014,Zambia,333721.0,1815.283936,4019.0


In [132]:
df1.iloc[1:4]

Unnamed: 0,Country_Region,Confirmed,Incident_Rate,Deaths
1,Albania,333293.0,11581.520606,3594.0
2,Algeria,271041.0,618.094762,6881.0
3,Andorra,46824.0,60601.824888,156.0


In [133]:
df1[df1['Country_Region']=='Papua New Guinea']

Unnamed: 0,Country_Region,Confirmed,Incident_Rate,Deaths
485,Papua New Guinea,45819.0,512.114248,668.0


In [134]:
df1.loc[df1['Country_Region']== 'Papua New Guinea']

Unnamed: 0,Country_Region,Confirmed,Incident_Rate,Deaths
485,Papua New Guinea,45819.0,512.114248,668.0


In [135]:
display('Sorted Data Frame', df1.sort_values(['Country_Region'], ascending=True).head())

'Sorted Data Frame'

Unnamed: 0,Country_Region,Confirmed,Incident_Rate,Deaths
0,Afghanistan,205391.0,527.613031,7833.0
1,Albania,333293.0,11581.520606,3594.0
2,Algeria,271041.0,618.094762,6881.0
3,Andorra,46824.0,60601.824888,156.0
4,Angola,104491.0,317.927791,1923.0


In [136]:
df1['Country_Region'].drop_duplicates(keep = 'first').head(10)

0            Afghanistan
1                Albania
2                Algeria
3                Andorra
4                 Angola
5             Antarctica
6    Antigua and Barbuda
7              Argentina
8                Armenia
9                    NaN
Name: Country_Region, dtype: object