In [1]:
import pandas as pd
import numpy as np

### DataFrame oluşturma

In [2]:
data = [2, 4, 18, 25, 55]
data

[2, 4, 18, 25, 55]

In [3]:
# Liste ile dataframe elde etme
pd.DataFrame(data, columns=['column1'])

Unnamed: 0,column1
0,2
1,4
2,18
3,25
4,55


In [4]:
# Liste ile series elde etme
pd.Series(data = data, name = "column_1")

0     2
1     4
2    18
3    25
4    55
Name: column_1, dtype: int64

In [5]:
# Np Array ile dataframe elde etme 
data = np.arange(1, 60, 5).reshape(3, 4)
data

array([[ 1,  6, 11, 16],
       [21, 26, 31, 36],
       [41, 46, 51, 56]])

In [6]:
df = pd.DataFrame(data=data, columns=['var1','var2','var3','var4'])
df

Unnamed: 0,var1,var2,var3,var4
0,1,6,11,16
1,21,26,31,36
2,41,46,51,56


In [7]:
# Sözlük yapısı ile dataframe elde etme 
s1 = np.random.randint(2, 15, size=4)
s2 = np.random.randint(3, 25, size=4)
s3 = np.random.randint(4, 35, size=4)

In [8]:
myDict= {'var1':s1, 'var2':s2, 'var3':s3}

In [9]:
df = pd.DataFrame(myDict)
df

Unnamed: 0,var1,var2,var3
0,2,22,15
1,4,5,33
2,6,20,9
3,2,8,24


### Dataframe Bazı Özellikleri Fonsiyonları

In [10]:
df.head(2)

Unnamed: 0,var1,var2,var3
0,2,22,15
1,4,5,33


In [11]:
df.tail(2)

Unnamed: 0,var1,var2,var3
2,6,20,9
3,2,8,24


In [12]:
df.sample(2)

Unnamed: 0,var1,var2,var3
1,4,5,33
3,2,8,24


In [13]:
df.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [14]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [15]:
df.rename(columns={"var1": "new1", "var2": "new1"},index={1: "x", 2: "y"},inplace=True)
df

Unnamed: 0,new1,new1.1,var3
0,2,22,15
x,4,5,33
y,6,20,9
3,2,8,24


In [16]:
df.shape

(4, 3)

In [17]:
df.size

12

In [18]:
df.ndim

2

In [19]:
df.values

array([[ 2, 22, 15],
       [ 4,  5, 33],
       [ 6, 20,  9],
       [ 2,  8, 24]])

### Pandas DataFrame İndeksleme, Dilimleme ve Seçim

In [20]:
from numpy import random

np.random.seed(101)

df = pd.DataFrame(np.random.randint(0,100, size =(5,4)), index = 'A B C D E'.split(), columns = 'W X Y Z'.split())
df

Unnamed: 0,W,X,Y,Z
A,95,11,81,70
B,63,87,75,9
C,77,40,4,63
D,40,60,92,64
E,5,12,93,40


In [21]:
df['Y']

A    81
B    75
C     4
D    92
E    93
Name: Y, dtype: int32

In [22]:
# SQL Syntax 

df.Y

A    81
B    75
C     4
D    92
E    93
Name: Y, dtype: int32

In [23]:
type(df['Y'])

pandas.core.series.Series

In [24]:
df[['Y']]

Unnamed: 0,Y
A,81
B,75
C,4
D,92
E,93


In [25]:
type(df[['Y']])

pandas.core.frame.DataFrame

In [26]:
df[['W',"X"]]

Unnamed: 0,W,X
A,95,11
B,63,87
C,77,40
D,40,60
E,5,12


In [27]:
df[:"A"]

Unnamed: 0,W,X,Y,Z
A,95,11,81,70


In [28]:
# it searches index to find ["B" : "D"]
df['B':'D']

Unnamed: 0,W,X,Y,Z
B,63,87,75,9
C,77,40,4,63
D,40,60,92,64


In [29]:
df["A":"C"][["Y", "Z"]]

Unnamed: 0,Y,Z
A,81,70
B,75,9
C,4,63


### Pandas DataFrame Sütün Ekleme, Çıkarma

In [30]:
df

Unnamed: 0,W,X,Y,Z
A,95,11,81,70
B,63,87,75,9
C,77,40,4,63
D,40,60,92,64
E,5,12,93,40


In [31]:
# Yeni sütün ekleme
df['F'] = df['X'] + df['Z']
df

Unnamed: 0,W,X,Y,Z,F
A,95,11,81,70,81
B,63,87,75,9,96
C,77,40,4,63,103
D,40,60,92,64,124
E,5,12,93,40,52


In [32]:
# Yeni sütün ekleme
df["K"] = np.arange(5)
df

Unnamed: 0,W,X,Y,Z,F,K
A,95,11,81,70,81,0
B,63,87,75,9,96,1
C,77,40,4,63,103,2
D,40,60,92,64,124,3
E,5,12,93,40,52,4


In [33]:
# Sütün atma atama veya inplace=True yapmadığımız için df değişmedi
df.drop('F', axis=1)

Unnamed: 0,W,X,Y,Z,K
A,95,11,81,70,0
B,63,87,75,9,1
C,77,40,4,63,2
D,40,60,92,64,3
E,5,12,93,40,4


In [34]:
df.drop(["F", "K"], axis=1)

Unnamed: 0,W,X,Y,Z
A,95,11,81,70
B,63,87,75,9
C,77,40,4,63
D,40,60,92,64
E,5,12,93,40


In [35]:
df.drop(columns = ["F", "K"])

Unnamed: 0,W,X,Y,Z
A,95,11,81,70
B,63,87,75,9
C,77,40,4,63
D,40,60,92,64
E,5,12,93,40


In [36]:
df

Unnamed: 0,W,X,Y,Z,F,K
A,95,11,81,70,81,0
B,63,87,75,9,96,1
C,77,40,4,63,103,2
D,40,60,92,64,124,3
E,5,12,93,40,52,4


In [37]:
# Yapılan değişikliği kalıca hale getirdik
df.drop(columns = ["F", "K"], inplace=True)

In [38]:
# Satır atma
df.drop('A', axis=0)

Unnamed: 0,W,X,Y,Z
B,63,87,75,9
C,77,40,4,63
D,40,60,92,64
E,5,12,93,40


In [39]:
df.drop(index=['B'])

Unnamed: 0,W,X,Y,Z
A,95,11,81,70
C,77,40,4,63
D,40,60,92,64
E,5,12,93,40


### Pandas DataFrame Loc Ve İloc

In [40]:
import seaborn as sns

In [41]:
tips = sns.load_dataset("tips")
tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


#### Tek değer çekme 

In [42]:
# loc[row_label, column_label]
# iloc[row_position, column_position]

In [43]:
tips.loc[4,"total_bill"]

24.59

In [44]:
tips.iloc[4, 0]

24.59

#### Birden çok satır veya sütun seçmek 

In [45]:
# Multiple rows
tips.loc[[1, 4], 'total_bill']

1    10.34
4    24.59
Name: total_bill, dtype: float64

In [46]:
#Multiple columns
tips.iloc[0, [0, 1]]


total_bill    16.99
tip            1.01
Name: 0, dtype: object

In [47]:
tips.loc[[1, 4], ['total_bill', 'tip']]

Unnamed: 0,total_bill,tip
1,10.34,1.66
4,24.59,3.61


In [48]:
tips.iloc[[1, 4], [0, 1]]


Unnamed: 0,total_bill,tip
1,10.34,1.66
4,24.59,3.61


In [49]:
tips.loc[1:4, 'total_bill']

1    10.34
2    21.01
3    23.68
4    24.59
Name: total_bill, dtype: float64

In [50]:
tips.loc[1:4]["total_bill"]

1    10.34
2    21.01
3    23.68
4    24.59
Name: total_bill, dtype: float64

In [51]:
rows = [1, 4]
cols = ['total_bill','tip']
tips.loc[rows, cols]

Unnamed: 0,total_bill,tip
1,10.34,1.66
4,24.59,3.61


In [52]:
rows = [1, 4]
cols = [0, 1]
tips.iloc[rows, cols]

Unnamed: 0,total_bill,tip
1,10.34,1.66
4,24.59,3.61


#### Dilim aracılığıyla bir veri aralığı seçme (start: stop:step)

In [53]:
# Slicing column labels

tips.loc[ 1:4, 'total_bill':'day']

Unnamed: 0,total_bill,tip,sex,smoker,day
1,10.34,1.66,Male,No,Sun
2,21.01,3.5,Male,No,Sun
3,23.68,3.31,Male,No,Sun
4,24.59,3.61,Female,No,Sun


In [54]:
tips.iloc[ 1:4, 0:5]

Unnamed: 0,total_bill,tip,sex,smoker,day
1,10.34,1.66,Male,No,Sun
2,21.01,3.5,Male,No,Sun
3,23.68,3.31,Male,No,Sun


In [55]:
tips.iloc[ 1:20:2, ::2]

Unnamed: 0,total_bill,sex,day,size
1,10.34,Male,Sun,3
3,23.68,Male,Sun,2
5,25.29,Male,Sun,4
7,26.88,Male,Sun,4
9,14.78,Male,Sun,2
11,35.26,Female,Sun,4
13,18.43,Male,Sun,4
15,21.58,Male,Sun,2
17,16.29,Male,Sun,3
19,20.65,Male,Sat,3


#### Koşullar üzerinden seçim ve çağrılabilir

In [56]:
# One condition loc
tips.loc[tips.total_bill > 35, :]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
11,35.26,5.0,Female,No,Sun,Dinner,4
23,39.42,7.58,Male,No,Sat,Dinner,4
56,38.01,3.0,Male,Yes,Sat,Dinner,4
59,48.27,6.73,Male,No,Sat,Dinner,4
95,40.17,4.73,Male,Yes,Fri,Dinner,4
102,44.3,2.5,Female,Yes,Sat,Dinner,3
112,38.07,4.0,Male,No,Sun,Dinner,3
142,41.19,5.0,Male,No,Thur,Lunch,5
156,48.17,5.0,Male,No,Sun,Dinner,6
170,50.81,10.0,Male,Yes,Sat,Dinner,3


In [57]:
## multiple conditions loc

tips.loc[(tips.total_bill > 35) & (tips.sex == 'Female'), ['total_bill','sex']]     

Unnamed: 0,total_bill,sex
11,35.26,Female
102,44.3,Female
197,43.11,Female
238,35.83,Female


In [58]:
# Getting ValueError
tips.iloc[tips.total_bill > 35, :]

NotImplementedError: iLocation based boolean indexing on an integer type is not available

In [59]:
# Single condition
tips.iloc[list(tips.total_bill > 35)]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
11,35.26,5.0,Female,No,Sun,Dinner,4
23,39.42,7.58,Male,No,Sat,Dinner,4
56,38.01,3.0,Male,Yes,Sat,Dinner,4
59,48.27,6.73,Male,No,Sat,Dinner,4
95,40.17,4.73,Male,Yes,Fri,Dinner,4
102,44.3,2.5,Female,Yes,Sat,Dinner,3
112,38.07,4.0,Male,No,Sun,Dinner,3
142,41.19,5.0,Male,No,Thur,Lunch,5
156,48.17,5.0,Male,No,Sun,Dinner,6
170,50.81,10.0,Male,Yes,Sat,Dinner,3


In [60]:
## multiple conditions
tips.iloc[ list((tips.total_bill > 40) & (tips.sex == 'Female')), :]    

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
102,44.3,2.5,Female,Yes,Sat,Dinner,3
197,43.11,5.0,Female,Yes,Thur,Lunch,4


#### Fonksiyon ile seçim 

In [62]:
# Selecting columns
tips.loc[:, lambda tips: ['sex', 'total_bill','tip']]

Unnamed: 0,sex,total_bill,tip
0,Female,16.99,1.01
1,Male,10.34,1.66
2,Male,21.01,3.50
3,Male,23.68,3.31
4,Female,24.59,3.61
...,...,...,...
239,Male,29.03,5.92
240,Female,27.18,2.00
241,Male,22.67,2.00
242,Male,17.82,1.75


In [64]:
# With condition
tips.loc[lambda tips: tips.total_bill > 40, :]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
59,48.27,6.73,Male,No,Sat,Dinner,4
95,40.17,4.73,Male,Yes,Fri,Dinner,4
102,44.3,2.5,Female,Yes,Sat,Dinner,3
142,41.19,5.0,Male,No,Thur,Lunch,5
156,48.17,5.0,Male,No,Sun,Dinner,6
170,50.81,10.0,Male,Yes,Sat,Dinner,3
182,45.35,3.5,Male,Yes,Sun,Dinner,3
184,40.55,3.0,Male,Yes,Sun,Dinner,2
197,43.11,5.0,Female,Yes,Thur,Lunch,4
212,48.33,9.0,Male,No,Sat,Dinner,4


In [65]:
tips.iloc[lambda tips: [0,1,2], :]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3


In [67]:
tips.iloc[lambda tips: list(tips.total_bill > 40), :]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
59,48.27,6.73,Male,No,Sat,Dinner,4
95,40.17,4.73,Male,Yes,Fri,Dinner,4
102,44.3,2.5,Female,Yes,Sat,Dinner,3
142,41.19,5.0,Male,No,Thur,Lunch,5
156,48.17,5.0,Male,No,Sun,Dinner,6
170,50.81,10.0,Male,Yes,Sat,Dinner,3
182,45.35,3.5,Male,Yes,Sun,Dinner,3
184,40.55,3.0,Male,Yes,Sun,Dinner,2
197,43.11,5.0,Female,Yes,Thur,Lunch,4
212,48.33,9.0,Male,No,Sat,Dinner,4


###  Pandas DataFrame Reset İndex Ve Set İndex

In [None]:
tips

In [None]:
tips.set_index('day')

In [None]:
tips.set_index('day',inplace=True)
tips.reset_index()