# Pandas

In [2]:
import numpy as np
import pandas as pd
import math

In [3]:
import warnings
warnings.filterwarnings('ignore')

## Seriler

### Array'den Seri olusturma

In [4]:
v = np.arange(1,8)
s1 = pd.Series(v)
s1

0    1
1    2
2    3
3    4
4    5
5    6
6    7
dtype: int64

---

### Temel bilgiler edinme

In [5]:
s1.dtype

dtype('int64')

In [6]:
s1.nbytes

56

In [7]:
s1.values.itemsize

8

In [8]:
s1.shape

(7,)

In [9]:
s1.count()

7

---

### Seri olusturma

In [10]:
s0 = pd.Series([1,2,3], index=['a','b','c'])
s0

a    1
b    2
c    3
dtype: int64

---

### Index degistirme

In [11]:
s1.index = ['a','b','c','d','e','f','g']
s1

a    1
b    2
c    3
d    4
e    5
f    6
g    7
dtype: int64

---

In [14]:
v2 = np.random.random(10)
ind2 = np.arange(10)
s = pd.Series(v2, ind2)
v2, ind2, s

(array([0.18638463, 0.39409596, 0.4570787 , 0.05945155, 0.47269727,
        0.31618704, 0.98294274, 0.91725525, 0.86333027, 0.37511558]),
 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 0    0.186385
 1    0.394096
 2    0.457079
 3    0.059452
 4    0.472697
 5    0.316187
 6    0.982943
 7    0.917255
 8    0.863330
 9    0.375116
 dtype: float64)

---

### Dictionary'den Series olusturma

In [15]:
dict1 = {'a1': 10, 'a2': 20, 'a3': 30, 'a4': 40}
s3 = pd.Series(dict1)
s3

a1    10
a2    20
a3    30
a4    40
dtype: int64

---

In [16]:
pd.Series(99, index=[0,1,2,3,4,5])

0    99
1    99
2    99
3    99
4    99
5    99
dtype: int64

---

### Seriler'de secme

In [18]:
s

0    0.186385
1    0.394096
2    0.457079
3    0.059452
4    0.472697
5    0.316187
6    0.982943
7    0.917255
8    0.863330
9    0.375116
dtype: float64

In [19]:
s[0:3]

0    0.186385
1    0.394096
2    0.457079
dtype: float64

In [21]:
s[-1:]

9    0.375116
dtype: float64

---

### Serileri birbirine ekleme

In [23]:
s2 = s1.copy()
s2

a    1
b    2
c    3
d    4
e    5
f    6
g    7
dtype: int64

In [26]:
s4 = s2.append(s3)
s4

a      1
b      2
c      3
d      4
e      5
f      6
g      7
a1    10
a2    20
a3    30
a4    40
dtype: int64

---

### Silme

In [28]:
s4.drop('a4', inplace=False)
#inplace False oldugu icin kalici olarak silmedi

a      1
b      2
c      3
d      4
e      5
f      6
g      7
a1    10
a2    20
a3    30
dtype: int64

In [29]:
#Kalici olarak silelim
s4.drop('a4', inplace=True)
s4

a      1
b      2
c      3
d      4
e      5
f      6
g      7
a1    10
a2    20
a3    30
dtype: int64

---

### Satir ekleme

In [30]:
s4 = s4.append(pd.Series({'a4':7}))
s4

a      1
b      2
c      3
d      4
e      5
f      6
g      7
a1    10
a2    20
a3    30
a4     7
dtype: int64

---

### Serilerde Operasyonlar ( +-/* )

In [31]:
v1 = np.array([10,20,30])
v2 = np.array([1,2,3])
s1 = pd.Series(v1)
s2 = pd.Series(v2)
s1, s2

(0    10
 1    20
 2    30
 dtype: int64,
 0    1
 1    2
 2    3
 dtype: int64)

### Toplama

In [32]:
s1.add(s2)

0    11
1    22
2    33
dtype: int64

In [37]:
s1.add(9) # butun elemanlara 9 ekler

0    19
1    29
2    39
dtype: int64

### Cikartma

In [35]:
#1
s1.sub(s2)
#2
s1.subtract(s2)

0     9
1    18
2    27
dtype: int64

### Carpma

In [38]:
#1
s1.mul(s2)
#2
s1.multiply(s2)

0    10
1    40
2    90
dtype: int64

### Bolme

In [39]:
#1
s1.div(s2)
#2
s1.divide(s2)

0    10.0
1    10.0
2    10.0
dtype: float64

---

### Hangi degerden kac adet var?

In [40]:
s5 = pd.Series([1,1,2,2,3,3], index = [0,1,2,3,4,5])
s5

0    1
1    1
2    2
3    2
4    3
5    3
dtype: int64

In [41]:
s5.value_counts()

1    2
2    2
3    2
dtype: int64

---

# DataFrames

### DataFrame Olusturma Cesitleri

In [42]:
lang = ['Java', 'Python', 'C', 'C++']
df = pd.DataFrame(lang)
df

Unnamed: 0,0
0,Java
1,Python
2,C
3,C++


In [43]:
#Yeni sutun ekleme
rating = [1,2,3,4]
df[1] = rating
df

Unnamed: 0,0,1
0,Java,1
1,Python,2
2,C,3
3,C++,4


In [44]:
#Sutun isimlerini degistirme
df.columns=['Language','Rating']
df

Unnamed: 0,Language,Rating
0,Java,1
1,Python,2
2,C,3
3,C++,4


In [45]:
data = [{'a':1, 'b':2}, {'a':5,'b':10,'c':20}]

df2 = pd.DataFrame(data) # index vermeden olusturma
df3 = pd.DataFrame(data, index = ['row1', 'row2'], columns=['a','b'])
df4 = pd.DataFrame(data, index = ['row1', 'row2'], columns=['a','b','c'])
df5 = pd.DataFrame(data, index = ['row1', 'row2'], columns=['a','b','c','d'])

In [46]:
df2

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [47]:
df3

Unnamed: 0,a,b
row1,1,2
row2,5,10


In [48]:
df4

Unnamed: 0,a,b,c
row1,1,2,
row2,5,10,20.0


In [49]:
df5

Unnamed: 0,a,b,c,d
row1,1,2,,
row2,5,10,20.0,


In [76]:
#Dictionary den df olusturma
dict1 = {'A' : pd.Series([1,2,3], index= ['a','b','c']),
         'B' : pd.Series([1,2,3,4], index= ['a','b','c','d'])}

df1 = pd.DataFrame(dict1)
df1

Unnamed: 0,A,B
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


---

### Datetime index

In [51]:
dates = pd.date_range(start = '2022-11-22', end='2022-11-28')
dates

DatetimeIndex(['2022-11-22', '2022-11-23', '2022-11-24', '2022-11-25',
               '2022-11-26', '2022-11-27', '2022-11-28'],
              dtype='datetime64[ns]', freq='D')

In [52]:
dates = pd.date_range('today', periods=7) # Bugunden basla 7 gun demek
dates

DatetimeIndex(['2023-11-18 02:23:02.234530', '2023-11-19 02:23:02.234530',
               '2023-11-20 02:23:02.234530', '2023-11-21 02:23:02.234530',
               '2023-11-22 02:23:02.234530', '2023-11-23 02:23:02.234530',
               '2023-11-24 02:23:02.234530'],
              dtype='datetime64[ns]', freq='D')

In [54]:
dates = pd.date_range(start='2022-11-22', periods=7)
dates

DatetimeIndex(['2022-11-22', '2022-11-23', '2022-11-24', '2022-11-25',
               '2022-11-26', '2022-11-27', '2022-11-28'],
              dtype='datetime64[ns]', freq='D')

---

### Dates verisini dataframe'in indexine atma

In [57]:
M = np.random.random((7,7))

In [59]:
dframe = pd.DataFrame(M, index= dates)
dframe

Unnamed: 0,0,1,2,3,4,5,6
2022-11-22,0.168278,0.65666,0.709764,0.549976,0.674829,0.981423,0.053513
2022-11-23,0.049464,0.293011,0.805784,0.554302,0.473879,0.943695,0.481103
2022-11-24,0.373527,0.540612,0.98927,0.574825,0.515888,0.876071,0.849748
2022-11-25,0.298593,0.457289,0.277082,0.790385,0.845525,0.711473,0.638476
2022-11-26,0.078289,0.451734,0.486143,0.048876,0.991927,0.313061,0.126083
2022-11-27,0.535833,0.080405,0.618045,0.028939,0.497498,0.023586,0.840253
2022-11-28,0.649039,0.332083,0.898225,0.127568,0.837417,0.087713,0.809583


In [60]:
#Sutun isimlerini degistirelim
dframe.columns = ['C1','C2','C3','C4','C5','C6','C7']
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,0.168278,0.65666,0.709764,0.549976,0.674829,0.981423,0.053513
2022-11-23,0.049464,0.293011,0.805784,0.554302,0.473879,0.943695,0.481103
2022-11-24,0.373527,0.540612,0.98927,0.574825,0.515888,0.876071,0.849748
2022-11-25,0.298593,0.457289,0.277082,0.790385,0.845525,0.711473,0.638476
2022-11-26,0.078289,0.451734,0.486143,0.048876,0.991927,0.313061,0.126083
2022-11-27,0.535833,0.080405,0.618045,0.028939,0.497498,0.023586,0.840253
2022-11-28,0.649039,0.332083,0.898225,0.127568,0.837417,0.087713,0.809583


In [61]:
dframe.index

DatetimeIndex(['2022-11-22', '2022-11-23', '2022-11-24', '2022-11-25',
               '2022-11-26', '2022-11-27', '2022-11-28'],
              dtype='datetime64[ns]', freq='D')

In [62]:
dframe.columns

Index(['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7'], dtype='object')

In [63]:
dframe.dtypes

C1    float64
C2    float64
C3    float64
C4    float64
C5    float64
C6    float64
C7    float64
dtype: object

---

### Siralama ascending&descending

In [67]:
dframe.sort_values(by='C1') #C1 sutununa gore kucukten buyuge sirala

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-23,0.049464,0.293011,0.805784,0.554302,0.473879,0.943695,0.481103
2022-11-26,0.078289,0.451734,0.486143,0.048876,0.991927,0.313061,0.126083
2022-11-22,0.168278,0.65666,0.709764,0.549976,0.674829,0.981423,0.053513
2022-11-25,0.298593,0.457289,0.277082,0.790385,0.845525,0.711473,0.638476
2022-11-24,0.373527,0.540612,0.98927,0.574825,0.515888,0.876071,0.849748
2022-11-27,0.535833,0.080405,0.618045,0.028939,0.497498,0.023586,0.840253
2022-11-28,0.649039,0.332083,0.898225,0.127568,0.837417,0.087713,0.809583


In [68]:
dframe.sort_values(by='C1',ascending=False) # C1 sutununa gore buyukten kucuge sirala

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-28,0.649039,0.332083,0.898225,0.127568,0.837417,0.087713,0.809583
2022-11-27,0.535833,0.080405,0.618045,0.028939,0.497498,0.023586,0.840253
2022-11-24,0.373527,0.540612,0.98927,0.574825,0.515888,0.876071,0.849748
2022-11-25,0.298593,0.457289,0.277082,0.790385,0.845525,0.711473,0.638476
2022-11-22,0.168278,0.65666,0.709764,0.549976,0.674829,0.981423,0.053513
2022-11-26,0.078289,0.451734,0.486143,0.048876,0.991927,0.313061,0.126083
2022-11-23,0.049464,0.293011,0.805784,0.554302,0.473879,0.943695,0.481103


---

### Sutun silme

In [69]:
df1

Unnamed: 0,A,B
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [77]:
# 1.silme yontemi
del df1['B'] # df1 in B sutununu sildik
df1

Unnamed: 0,A
a,1.0
b,2.0
c,3.0
d,


In [78]:
df5

Unnamed: 0,a,b,c,d
row1,1,2,,
row2,5,10,20.0,


In [79]:
# 2.silme yontemi
df5.pop('c')

row1     NaN
row2    20.0
Name: c, dtype: float64

In [83]:
dict1 = {'A' : pd.Series([1,2,3], index= ['a','b','c']),
         'B' : pd.Series([1,2,3,4], index= ['a','b','c','d'])}

df12 = pd.DataFrame(dict1)
df12

Unnamed: 0,A,B
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [84]:
# 3.Silme yontemi
df12.drop(['A'], axis=1, inplace=True)
df12

Unnamed: 0,B
a,1
b,2
c,3
d,4


---

### Satir silme

In [90]:
col1 = np.linspace(10,100,30)
col2 = np.random.randint(10,100,30)
df10 = pd.DataFrame({'C1': col1,'C2': col2})
df10

Unnamed: 0,C1,C2
0,10.0,24
1,13.103448,88
2,16.206897,67
3,19.310345,76
4,22.413793,91
5,25.517241,54
6,28.62069,47
7,31.724138,57
8,34.827586,54
9,37.931034,48


In [91]:
df10 = df10.drop([17,18,19], axis = 0)
df10

Unnamed: 0,C1,C2
0,10.0,24
1,13.103448,88
2,16.206897,67
3,19.310345,76
4,22.413793,91
5,25.517241,54
6,28.62069,47
7,31.724138,57
8,34.827586,54
9,37.931034,48


In [92]:
df10.drop([15], axis=0, inplace=True)
df10

Unnamed: 0,C1,C2
0,10.0,24
1,13.103448,88
2,16.206897,67
3,19.310345,76
4,22.413793,91
5,25.517241,54
6,28.62069,47
7,31.724138,57
8,34.827586,54
9,37.931034,48


In [93]:
df10.drop(df10.index[5], inplace=True)
df10

Unnamed: 0,C1,C2
0,10.0,24
1,13.103448,88
2,16.206897,67
3,19.310345,76
4,22.413793,91
6,28.62069,47
7,31.724138,57
8,34.827586,54
9,37.931034,48
10,41.034483,55


In [94]:
df10 = df10.iloc[3:,]
df10

Unnamed: 0,C1,C2
3,19.310345,76
4,22.413793,91
6,28.62069,47
7,31.724138,57
8,34.827586,54
9,37.931034,48
10,41.034483,55
11,44.137931,79
12,47.241379,43
13,50.344828,82


In [95]:
df10 = df10.iloc[:-4,]
df10

Unnamed: 0,C1,C2
3,19.310345,76
4,22.413793,91
6,28.62069,47
7,31.724138,57
8,34.827586,54
9,37.931034,48
10,41.034483,55
11,44.137931,79
12,47.241379,43
13,50.344828,82


In [96]:
df10 = df10.iloc[:10,]
df10

Unnamed: 0,C1,C2
3,19.310345,76
4,22.413793,91
6,28.62069,47
7,31.724138,57
8,34.827586,54
9,37.931034,48
10,41.034483,55
11,44.137931,79
12,47.241379,43
13,50.344828,82


In [98]:
df10.index[df10['C2']==54].tolist()

[8]

In [99]:
df10.drop(df10.index[df10['C2']==54].tolist(), axis=0, inplace=True)
df10

Unnamed: 0,C1,C2
3,19.310345,76
4,22.413793,91
6,28.62069,47
7,31.724138,57
9,37.931034,48
10,41.034483,55
11,44.137931,79
12,47.241379,43
13,50.344828,82


In [100]:
df10 = df10.drop(df10[df10['C2']==79].index)
df10

Unnamed: 0,C1,C2
3,19.310345,76
4,22.413793,91
6,28.62069,47
7,31.724138,57
9,37.931034,48
10,41.034483,55
12,47.241379,43
13,50.344828,82


In [103]:
df10 = df10[df10.C2 != 43]
df10

Unnamed: 0,C1,C2
3,19.310345,76
4,22.413793,91
6,28.62069,47
7,31.724138,57
9,37.931034,48
10,41.034483,55
13,50.344828,82


In [105]:
df10 = df10[~(df10.C2.isin([76,91]))]
df10

Unnamed: 0,C1,C2
6,28.62069,47
7,31.724138,57
9,37.931034,48
10,41.034483,55
13,50.344828,82


In [106]:
df10 = df10[(df10.C2.isin([48,55]))]
df10

Unnamed: 0,C1,C2
9,37.931034,48
10,41.034483,55


In [107]:
dict1 = {'A' : pd.Series([1,2,3], index= ['a','b','c']),
         'B' : pd.Series([1,2,3,4], index= ['a','b','c','d'])}

df11 = pd.DataFrame(dict1)
df11

Unnamed: 0,A,B
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [108]:
df11.drop('d',axis=0, inplace=True)
df11

Unnamed: 0,A,B
a,1.0,1
b,2.0,2
c,3.0,3


In [110]:
df13 = pd.DataFrame({'ID' : [1,2,3,4],
                     'Name' : ['Berna', 'Efkan', 'Hilal', 'Dogu'],
                     'location' : ['UK', 'US', 'Austria', 'Spain']})
df13

Unnamed: 0,ID,Name,location
0,1,Berna,UK
1,2,Efkan,US
2,3,Hilal,Austria
3,4,Dogu,Spain


In [112]:
ind = df13[((df13.ID == 4) & (df13.Name == 'Dogu')& (df13.location == 'Spain'))].index
df13.drop(ind, inplace=True)
df13

Unnamed: 0,ID,Name,location
0,1,Berna,UK
1,2,Efkan,US
2,3,Hilal,Austria


---

### Veri Secme

In [113]:
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,0.168278,0.65666,0.709764,0.549976,0.674829,0.981423,0.053513
2022-11-23,0.049464,0.293011,0.805784,0.554302,0.473879,0.943695,0.481103
2022-11-24,0.373527,0.540612,0.98927,0.574825,0.515888,0.876071,0.849748
2022-11-25,0.298593,0.457289,0.277082,0.790385,0.845525,0.711473,0.638476
2022-11-26,0.078289,0.451734,0.486143,0.048876,0.991927,0.313061,0.126083
2022-11-27,0.535833,0.080405,0.618045,0.028939,0.497498,0.023586,0.840253
2022-11-28,0.649039,0.332083,0.898225,0.127568,0.837417,0.087713,0.809583


In [114]:
dframe['2022-11-22': '2022-11-24']

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,0.168278,0.65666,0.709764,0.549976,0.674829,0.981423,0.053513
2022-11-23,0.049464,0.293011,0.805784,0.554302,0.473879,0.943695,0.481103
2022-11-24,0.373527,0.540612,0.98927,0.574825,0.515888,0.876071,0.849748


In [115]:
dframe.loc[:, ['C1', 'C7']]

Unnamed: 0,C1,C7
2022-11-22,0.168278,0.053513
2022-11-23,0.049464,0.481103
2022-11-24,0.373527,0.849748
2022-11-25,0.298593,0.638476
2022-11-26,0.078289,0.126083
2022-11-27,0.535833,0.840253
2022-11-28,0.649039,0.809583


In [116]:
dframe[dframe['C1']> 0.5]

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-27,0.535833,0.080405,0.618045,0.028939,0.497498,0.023586,0.840253
2022-11-28,0.649039,0.332083,0.898225,0.127568,0.837417,0.087713,0.809583


In [119]:
dframe[(dframe['C1']>0.2)&(dframe['C4']>0.5)]

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-24,0.373527,0.540612,0.98927,0.574825,0.515888,0.876071,0.849748
2022-11-25,0.298593,0.457289,0.277082,0.790385,0.845525,0.711473,0.638476


In [120]:
dframe.iloc[0][0]

0.16827758101135848

In [121]:
dframe.iloc[:, 0:3]

Unnamed: 0,C1,C2,C3
2022-11-22,0.168278,0.65666,0.709764
2022-11-23,0.049464,0.293011,0.805784
2022-11-24,0.373527,0.540612,0.98927
2022-11-25,0.298593,0.457289,0.277082
2022-11-26,0.078289,0.451734,0.486143
2022-11-27,0.535833,0.080405,0.618045
2022-11-28,0.649039,0.332083,0.898225


---

### Deger Atama

In [122]:
dframe['C1'] = 888
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.65666,0.709764,0.549976,0.674829,0.981423,0.053513
2022-11-23,888,0.293011,0.805784,0.554302,0.473879,0.943695,0.481103
2022-11-24,888,0.540612,0.98927,0.574825,0.515888,0.876071,0.849748
2022-11-25,888,0.457289,0.277082,0.790385,0.845525,0.711473,0.638476
2022-11-26,888,0.451734,0.486143,0.048876,0.991927,0.313061,0.126083
2022-11-27,888,0.080405,0.618045,0.028939,0.497498,0.023586,0.840253
2022-11-28,888,0.332083,0.898225,0.127568,0.837417,0.087713,0.809583


In [123]:
dframe.loc[0:3, 'C6']= 777
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.65666,0.709764,0.549976,0.674829,777.0,0.053513
2022-11-23,888,0.293011,0.805784,0.554302,0.473879,777.0,0.481103
2022-11-24,888,0.540612,0.98927,0.574825,0.515888,777.0,0.849748
2022-11-25,888,0.457289,0.277082,0.790385,0.845525,0.711473,0.638476
2022-11-26,888,0.451734,0.486143,0.048876,0.991927,0.313061,0.126083
2022-11-27,888,0.080405,0.618045,0.028939,0.497498,0.023586,0.840253
2022-11-28,888,0.332083,0.898225,0.127568,0.837417,0.087713,0.809583


In [124]:
dframe.iloc[0,2]=333
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.65666,333.0,0.549976,0.674829,777.0,0.053513
2022-11-23,888,0.293011,0.805784,0.554302,0.473879,777.0,0.481103
2022-11-24,888,0.540612,0.98927,0.574825,0.515888,777.0,0.849748
2022-11-25,888,0.457289,0.277082,0.790385,0.845525,0.711473,0.638476
2022-11-26,888,0.451734,0.486143,0.048876,0.991927,0.313061,0.126083
2022-11-27,888,0.080405,0.618045,0.028939,0.497498,0.023586,0.840253
2022-11-28,888,0.332083,0.898225,0.127568,0.837417,0.087713,0.809583


In [125]:
dframe.iloc[0, 2] = 555
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.65666,555.0,0.549976,0.674829,777.0,0.053513
2022-11-23,888,0.293011,0.805784,0.554302,0.473879,777.0,0.481103
2022-11-24,888,0.540612,0.98927,0.574825,0.515888,777.0,0.849748
2022-11-25,888,0.457289,0.277082,0.790385,0.845525,0.711473,0.638476
2022-11-26,888,0.451734,0.486143,0.048876,0.991927,0.313061,0.126083
2022-11-27,888,0.080405,0.618045,0.028939,0.497498,0.023586,0.840253
2022-11-28,888,0.332083,0.898225,0.127568,0.837417,0.087713,0.809583


---

### Null degerler ile ilgilenme

In [127]:
dframe.loc[0:8, 'C7'] = np.NaN
dframe.loc[0:2, 'C6'] = np.NaN
dframe.loc[5:6, 'C5'] = np.NaN
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.65666,555.0,0.549976,0.674829,,
2022-11-23,888,0.293011,0.805784,0.554302,0.473879,,
2022-11-24,888,0.540612,0.98927,0.574825,0.515888,777.0,
2022-11-25,888,0.457289,0.277082,0.790385,0.845525,0.711473,
2022-11-26,888,0.451734,0.486143,0.048876,0.991927,0.313061,
2022-11-27,888,0.080405,0.618045,0.028939,,0.023586,
2022-11-28,888,0.332083,0.898225,0.127568,0.837417,0.087713,


In [129]:
dframe.notna()#Nan olmayanlar true doner

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,True,True,True,True,True,False,False
2022-11-23,True,True,True,True,True,False,False
2022-11-24,True,True,True,True,True,True,False
2022-11-25,True,True,True,True,True,True,False
2022-11-26,True,True,True,True,True,True,False
2022-11-27,True,True,True,True,False,True,False
2022-11-28,True,True,True,True,True,True,False


In [130]:
dframe.isna()#Nan olanlar true doner

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,False,False,False,False,False,True,True
2022-11-23,False,False,False,False,False,True,True
2022-11-24,False,False,False,False,False,False,True
2022-11-25,False,False,False,False,False,False,True
2022-11-26,False,False,False,False,False,False,True
2022-11-27,False,False,False,False,True,False,True
2022-11-28,False,False,False,False,False,False,True


In [131]:
# Nan degerleri doldurma
dframe = dframe.fillna(1020)
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.65666,555.0,0.549976,0.674829,1020.0,1020.0
2022-11-23,888,0.293011,0.805784,0.554302,0.473879,1020.0,1020.0
2022-11-24,888,0.540612,0.98927,0.574825,0.515888,777.0,1020.0
2022-11-25,888,0.457289,0.277082,0.790385,0.845525,0.711473,1020.0
2022-11-26,888,0.451734,0.486143,0.048876,0.991927,0.313061,1020.0
2022-11-27,888,0.080405,0.618045,0.028939,1020.0,0.023586,1020.0
2022-11-28,888,0.332083,0.898225,0.127568,0.837417,0.087713,1020.0


In [132]:
dframe.loc[0:5, 'C7'] = np.NaN
dframe.loc[0:2, 'C6'] = np.NaN
dframe.loc[5:6, 'C5'] = np.NaN
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.65666,555.0,0.549976,0.674829,,
2022-11-23,888,0.293011,0.805784,0.554302,0.473879,,
2022-11-24,888,0.540612,0.98927,0.574825,0.515888,777.0,
2022-11-25,888,0.457289,0.277082,0.790385,0.845525,0.711473,
2022-11-26,888,0.451734,0.486143,0.048876,0.991927,0.313061,
2022-11-27,888,0.080405,0.618045,0.028939,,0.023586,1020.0
2022-11-28,888,0.332083,0.898225,0.127568,0.837417,0.087713,1020.0


In [133]:
#spesifik bir sutunu doldurma
dframe.fillna(value = {'C5':123, 'C6': 789})

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.65666,555.0,0.549976,0.674829,789.0,
2022-11-23,888,0.293011,0.805784,0.554302,0.473879,789.0,
2022-11-24,888,0.540612,0.98927,0.574825,0.515888,777.0,
2022-11-25,888,0.457289,0.277082,0.790385,0.845525,0.711473,
2022-11-26,888,0.451734,0.486143,0.048876,0.991927,0.313061,
2022-11-27,888,0.080405,0.618045,0.028939,123.0,0.023586,1020.0
2022-11-28,888,0.332083,0.898225,0.127568,0.837417,0.087713,1020.0


In [134]:
dframe.fillna(value = {'C7': 789}, limit=1)# c7 sutununda sadece 1 degeri doldurma

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.65666,555.0,0.549976,0.674829,,789.0
2022-11-23,888,0.293011,0.805784,0.554302,0.473879,,
2022-11-24,888,0.540612,0.98927,0.574825,0.515888,777.0,
2022-11-25,888,0.457289,0.277082,0.790385,0.845525,0.711473,
2022-11-26,888,0.451734,0.486143,0.048876,0.991927,0.313061,
2022-11-27,888,0.080405,0.618045,0.028939,,0.023586,1020.0
2022-11-28,888,0.332083,0.898225,0.127568,0.837417,0.087713,1020.0


In [136]:
# nan degerleri satir bazli silme
dframe.dropna()

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-28,888,0.332083,0.898225,0.127568,0.837417,0.087713,1020.0


In [138]:
# Nan degerleri sutun bazli silme
dframe.dropna(axis=1)

Unnamed: 0,C1,C2,C3,C4
2022-11-22,888,0.65666,555.0,0.549976
2022-11-23,888,0.293011,0.805784,0.554302
2022-11-24,888,0.540612,0.98927,0.574825
2022-11-25,888,0.457289,0.277082,0.790385
2022-11-26,888,0.451734,0.486143,0.048876
2022-11-27,888,0.080405,0.618045,0.028939
2022-11-28,888,0.332083,0.898225,0.127568


In [139]:
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.65666,555.0,0.549976,0.674829,,
2022-11-23,888,0.293011,0.805784,0.554302,0.473879,,
2022-11-24,888,0.540612,0.98927,0.574825,0.515888,777.0,
2022-11-25,888,0.457289,0.277082,0.790385,0.845525,0.711473,
2022-11-26,888,0.451734,0.486143,0.048876,0.991927,0.313061,
2022-11-27,888,0.080405,0.618045,0.028939,,0.023586,1020.0
2022-11-28,888,0.332083,0.898225,0.127568,0.837417,0.087713,1020.0


In [140]:
#Sadece istedigimiz sutundaki nan degerleri satir bazli silme
dframe.dropna(subset = ['C5','C6'])

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-24,888,0.540612,0.98927,0.574825,0.515888,777.0,
2022-11-25,888,0.457289,0.277082,0.790385,0.845525,0.711473,
2022-11-26,888,0.451734,0.486143,0.048876,0.991927,0.313061,
2022-11-28,888,0.332083,0.898225,0.127568,0.837417,0.087713,1020.0


In [141]:
dframe.fillna(55, inplace=True)
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.65666,555.0,0.549976,0.674829,55.0,55.0
2022-11-23,888,0.293011,0.805784,0.554302,0.473879,55.0,55.0
2022-11-24,888,0.540612,0.98927,0.574825,0.515888,777.0,55.0
2022-11-25,888,0.457289,0.277082,0.790385,0.845525,0.711473,55.0
2022-11-26,888,0.451734,0.486143,0.048876,0.991927,0.313061,55.0
2022-11-27,888,0.080405,0.618045,0.028939,55.0,0.023586,1020.0
2022-11-28,888,0.332083,0.898225,0.127568,0.837417,0.087713,1020.0


In [142]:
dframe.mean()

C1    888.000000
C2      0.401685
C3     79.867793
C4      0.382124
C5      8.477067
C6    126.876548
C7    330.714286
dtype: float64

In [144]:
dframe.max()

C1     888.000000
C2       0.656660
C3     555.000000
C4       0.790385
C5      55.000000
C6     777.000000
C7    1020.000000
dtype: float64

In [145]:
dframe.min()

C1    888.000000
C2      0.080405
C3      0.277082
C4      0.028939
C5      0.473879
C6      0.023586
C7     55.000000
dtype: float64

In [146]:
# mean, std, var, sum

In [147]:
dframe.describe()

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
count,7.0,7.0,7.0,7.0,7.0,7.0,7.0
mean,888.0,0.401685,79.867793,0.382124,8.477067,126.876548,330.714286
std,0.0,0.186987,209.513754,0.306111,20.515529,287.835602,470.871785
min,888.0,0.080405,0.277082,0.028939,0.473879,0.023586,55.0
25%,888.0,0.312547,0.552094,0.088222,0.595359,0.200387,55.0
50%,888.0,0.451734,0.805784,0.549976,0.837417,0.711473,55.0
75%,888.0,0.49895,0.943748,0.564563,0.918726,55.0,537.5
max,888.0,0.65666,555.0,0.790385,55.0,777.0,1020.0


In [148]:
# korelasyon
dframe.corr()

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
C1,,,,,,,
C2,,1.0,0.601298,0.547616,-0.757696,0.359319,-0.714014
C3,,0.601298,1.0,0.241657,-0.167835,-0.109429,-0.257941
C4,,0.547616,0.241657,1.0,-0.512988,0.318894,-0.678131
C5,,-0.757696,-0.167835,-0.512988,1.0,-0.199333,0.647372
C6,,0.359319,-0.109429,0.318894,-0.199333,1.0,-0.300988
C7,,-0.714014,-0.257941,-0.678131,0.647372,-0.300988,1.0


---

### Df uzerine fonksiyon uygulama

In [149]:
dframe

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,888,0.65666,555.0,0.549976,0.674829,55.0,55.0
2022-11-23,888,0.293011,0.805784,0.554302,0.473879,55.0,55.0
2022-11-24,888,0.540612,0.98927,0.574825,0.515888,777.0,55.0
2022-11-25,888,0.457289,0.277082,0.790385,0.845525,0.711473,55.0
2022-11-26,888,0.451734,0.486143,0.048876,0.991927,0.313061,55.0
2022-11-27,888,0.080405,0.618045,0.028939,55.0,0.023586,1020.0
2022-11-28,888,0.332083,0.898225,0.127568,0.837417,0.087713,1020.0


In [150]:
dframe.apply(max)

C1     888.000000
C2       0.656660
C3     555.000000
C4       0.790385
C5      55.000000
C6     777.000000
C7    1020.000000
dtype: float64

In [151]:
dframe.apply(sum)

C1    6216.000000
C2       2.811794
C3     559.074550
C4       2.674870
C5      59.339466
C6     888.135833
C7    2315.000000
dtype: float64

In [152]:
dframe.apply(sum, axis=1)

2022-11-22    1554.881465
2022-11-23    1000.126975
2022-11-24    1722.620596
2022-11-25     946.081754
2022-11-26     945.291741
2022-11-27    1963.750975
2022-11-28    1910.283007
Freq: D, dtype: float64

In [153]:
dframe.applymap(np.sqrt)

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,29.799329,0.810346,23.558438,0.741603,0.82148,7.416198,7.416198
2022-11-23,29.799329,0.541305,0.897655,0.744514,0.688389,7.416198,7.416198
2022-11-24,29.799329,0.735263,0.994621,0.758172,0.718253,27.87472,7.416198
2022-11-25,29.799329,0.676231,0.526386,0.889036,0.919524,0.843488,7.416198
2022-11-26,29.799329,0.672112,0.69724,0.221078,0.995955,0.559519,7.416198
2022-11-27,29.799329,0.283559,0.786158,0.170114,7.416198,0.153578,31.937439
2022-11-28,29.799329,0.576266,0.947747,0.357166,0.915105,0.296165,31.937439


In [154]:
dframe.apply(lambda x: min(x))

C1    888.000000
C2      0.080405
C3      0.277082
C4      0.028939
C5      0.473879
C6      0.023586
C7     55.000000
dtype: float64

In [155]:
dframe.apply(lambda x: x*x)

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7
2022-11-22,788544,0.431203,308025.0,0.302473,0.455395,3025.0,3025.0
2022-11-23,788544,0.085855,0.649287,0.30725,0.224561,3025.0,3025.0
2022-11-24,788544,0.292262,0.978655,0.330424,0.26614,603729.0,3025.0
2022-11-25,788544,0.209113,0.076775,0.624709,0.714912,0.506193,3025.0
2022-11-26,788544,0.204064,0.236335,0.002389,0.98392,0.098007,3025.0
2022-11-27,788544,0.006465,0.38198,0.000837,3025.0,0.000556,1040400.0
2022-11-28,788544,0.110279,0.806809,0.016274,0.701268,0.007694,1040400.0


---

### DataFrame Birlestirme (merge)

In [156]:
daf1 = pd.DataFrame({'id' : ['1','2','3','4','5'], 'Name':['Toygar', 'Necmettin', 'Furkan', 'Ali', 'Altun']})
daf1

Unnamed: 0,id,Name
0,1,Toygar
1,2,Necmettin
2,3,Furkan
3,4,Ali
4,5,Altun


In [157]:
daf2 = pd.DataFrame({'id' : ['1','2','6','7','8'], 'Score':[40, 60, 80, 90, 70]})
daf2

Unnamed: 0,id,Score
0,1,40
1,2,60
2,6,80
3,7,90
4,8,70
