# Pandas Overview

* Series
* DataFrames
* Missing Data
* GroupBy
* Merging, Joining and Concatenating
* Operations
* Data Input and Ouput

### Basic Atributes
* .sort_index
* .sort_values
* .isin
* .index
* .keys
* .values
* items

In [16]:
import numpy as np
import pandas as pd

## Creating a Pandas Series with Basic Format

In [17]:
pd.Series([10, 88, 3, 4, 5])

0    10
1    88
2     3
3     4
4     5
dtype: int64

In [18]:
ser = pd.Series([10, 88, 3, 4, 5])
ser

0    10
1    88
2     3
3     4
4     5
dtype: int64

## Basic Attributes

In [19]:
type(ser)

pandas.core.series.Series

In [20]:
ser.dtype

dtype('int64')

In [21]:
ser.size

5

In [22]:
ser.ndim

1

In [23]:
ser.values

array([10, 88,  3,  4,  5], dtype=int64)

In [25]:
type(ser.values)

numpy.ndarray

In [26]:
for i in ser.values :
    print(i)

10
88
3
4
5


In [27]:
[i for i in ser.values]

[10, 88, 3, 4, 5]

In [30]:
ser.head(3)

0    10
1    88
2     3
dtype: int64

In [31]:
ser.tail()

0    10
1    88
2     3
3     4
4     5
dtype: int64

In [32]:
pd.Series([i for i in "clarusway"])

0    c
1    l
2    a
3    r
4    u
5    s
6    w
7    a
8    y
dtype: object

## Creating Pandas Series Using a list, numpy-array or dict

In [33]:
labels = ["a", "b", "c"]

my_list = [10, 20, 30]

arr = np.array([10, 20, 30])

d = {"a" : 10, "b" : 20, "c" : 30}

## using list

In [34]:
pd.Series(data = my_list)

0    10
1    20
2    30
dtype: int64

In [35]:
pd.Series(data = my_list, index = labels)

a    10
b    20
c    30
dtype: int64

In [36]:
pd.Series(my_list, labels)

a    10
b    20
c    30
dtype: int64

## using numpy arrays

In [37]:
arr

array([10, 20, 30])

In [38]:
pd.Series(arr)

0    10
1    20
2    30
dtype: int32

In [39]:
pd.Series(arr, labels)

a    10
b    20
c    30
dtype: int32

## using dictionary

In [41]:
d  # keys = indexler -- values = değerler

{'a': 10, 'b': 20, 'c': 30}

In [42]:
pd.Series(d)

a    10
b    20
c    30
dtype: int64

In [44]:
pd.Series(d, ["w","c","z"])  # index numarası sözlükte varsa döndürür. Yoksa NaN döndürür

w     NaN
c    30.0
z     NaN
dtype: float64

In [49]:
a = np.nan  # nan atadık. yani boş değer

In [50]:
a

nan

## Data in Series

In [51]:
labels

['a', 'b', 'c']

In [52]:
pd.Series(data  = labels)

0    a
1    b
2    c
dtype: object

In [54]:
pd.Series([sum, print, len])  # bunları dahi atayabiliyoruz. sadece kabiliyetini göstermek için denedik

0      <built-in function sum>
1    <built-in function print>
2      <built-in function len>
dtype: object

In [55]:
mix_data = [1,"cat",True]

In [56]:
pd.Series(mix_data)

0       1
1     cat
2    True
dtype: object

## Sorting Index | Sorting Values | Check Data if it is in the pd.Series

In [57]:
s = pd.Series(["p", "q", "r", "s"], index=[3, 2, 4, 5])

In [63]:
s

3    p
2    q
4    r
5    s
dtype: object

In [62]:
s.sort_index()  # index'e göre sıraladı. value'şar aynı sırada, değişmedi

2    q
3    p
4    r
5    s
dtype: object

In [65]:
s.sort_index(ascending=False)  # DESC olarak sıraladı

5    s
4    r
3    p
2    q
dtype: object

In [66]:
s.sort_index(inplace=True)  # sıralama kalıcı hale geldi

In [67]:
s

2    q
3    p
4    r
5    s
dtype: object

In [69]:
t = pd.Series(["p", "q", "r", "s"], index=[3, 2, 4, np.nan])

In [72]:
t.sort_index()  # nan default olarak en sona geldi

2.0    q
3.0    p
4.0    r
NaN    s
dtype: object

In [73]:
t.sort_index(na_position="first") # nan başa geldi

NaN    s
2.0    q
3.0    p
4.0    r
dtype: object

In [76]:
arrays = [np.array(["xx", "xx", "ff", "ff",
                   "bb", "bb", "br", "br"]),                    #  çift kademeli index
         np.array(["two", "one", "two", "one",
                   "two", "one", "two", "one"])]

In [77]:
y = pd.Series([2,3,4,5,6,7,8,9], index=arrays)

In [78]:
y.sort_index(level=1)  # birinci array'e göre sırala

bb  one    7
br  one    9
ff  one    5
xx  one    3
bb  two    6
br  two    8
ff  two    4
xx  two    2
dtype: int64

In [79]:
s = pd.Series([np.nan, 2,4,10,7])
s

0     NaN
1     2.0
2     4.0
3    10.0
4     7.0
dtype: float64

In [83]:
s.sort_values(ascending=True)  # valuelere göre sıralayınca indexlerin yeri de değişti  -nan sona gitti

1     2.0
2     4.0
4     7.0
3    10.0
0     NaN
dtype: float64

In [86]:
s.sort_values(ascending=False, inplace=True)  # kalıcı oldu  -- nan byine sona gitti

In [87]:
s

3    10.0
4     7.0
2     4.0
1     2.0
0     NaN
dtype: float64

In [88]:
s.sort_values(na_position="first")  # na bu sefer başa gitti

0     NaN
1     2.0
2     4.0
4     7.0
3    10.0
dtype: float64

In [89]:
s = pd.Series(["dog", "cow", "dog", "cat", "dog", "lion"], name="animal")

In [90]:
s

0     dog
1     cow
2     dog
3     cat
4     dog
5    lion
Name: animal, dtype: object

In [91]:
s.isin(["cow", "dog"])  # isin = içinde mi

0     True
1     True
2     True
3    False
4     True
5    False
Name: animal, dtype: bool

In [92]:
s.isin(["dog"])

0     True
1    False
2     True
3    False
4     True
5    False
Name: animal, dtype: bool

## Indexing Pandas Series

In [93]:
ser1 = pd.Series([1,2,3,4], index=["USA", "Germany", "USSR", "Japan"])

In [94]:
ser1

USA        1
Germany    2
USSR       3
Japan      4
dtype: int64

In [95]:
ser2 = pd.Series([1,2,5,4], index=["USA", "Germany", "Italy", "Japan"])

In [96]:
ser2

USA        1
Germany    2
Italy      5
Japan      4
dtype: int64

In [99]:
ser1["USA"]  # USA'nın index numarası

1

In [105]:
ser1[0]  # arka planda çalışan 0.ncı indexin karşılığı olan değer

1

In [104]:
labels

['a', 'b', 'c']

In [101]:
ser3 = pd.Series(data=labels)

In [102]:
ser3

0    a
1    b
2    c
dtype: object

In [103]:
ser3[0]

'a'

In [106]:
ser1

USA        1
Germany    2
USSR       3
Japan      4
dtype: int64

In [107]:
ser2

USA        1
Germany    2
Italy      5
Japan      4
dtype: int64

In [110]:
ser1 + ser2  # ikisini birleştirdik. karşılığı olmayan değerler NaN döndü  -- inner join--

Germany    4.0
Italy      NaN
Japan      8.0
USA        2.0
USSR       NaN
dtype: float64

## Indexing Examples

In [111]:
a = np.array([1,2,33,444,75])

In [112]:
panser = pd.Series(a)
panser

0      1
1      2
2     33
3    444
4     75
dtype: int32

In [113]:
panser[0]

1

In [114]:
panser[1:3]

1     2
2    33
dtype: int32

In [115]:
panser = pd.Series([121, 200, 150, 99], index=["ali", "veli", "ayşe", "nur"])

In [116]:
panser

ali     121
veli    200
ayşe    150
nur      99
dtype: int64

In [117]:
panser["ali"]

121

In [118]:
panser[0]

121

In [120]:
panser[["veli", "nur"]]  # fancy metodu

veli    200
nur      99
dtype: int64

In [122]:
panser["veli": "nur"]  # bu durumda slicing yapar. nur'u da aldık. DİKKAT!!!

veli    200
ayşe    150
nur      99
dtype: int64

In [124]:
panser[0:3]  # bu sefer nur'u almadı

ali     121
veli    200
ayşe    150
dtype: int64

In [125]:
panser["ali" : "nur"]  # bu şekilde indexleri verirsek son belirtileni alır. index no'lara göre çağırırsak stop-1 e kadar alır

ali     121
veli    200
ayşe    150
nur      99
dtype: int64

## Several Selecting Attributes

In [126]:
panser.index

Index(['ali', 'veli', 'ayşe', 'nur'], dtype='object')

In [127]:
panser.keys

<bound method Series.keys of ali     121
veli    200
ayşe    150
nur      99
dtype: int64>

In [128]:
panser.values

array([121, 200, 150,  99], dtype=int64)

In [133]:
panser.items

<bound method Series.items of ali     121
veli    200
ayşe    150
nur      99
dtype: int64>

In [134]:
panser.items()  # parantez yaparsak ketum döndürür

<zip at 0x24b1aa851c0>

In [136]:
list(panser.items())  # list ile iterate edebiliriz

[('ali', 121), ('veli', 200), ('ayşe', 150), ('nur', 99)]

In [138]:
for index, value in panser.items() :
    print(index, "-", value)

ali - 121
veli - 200
ayşe - 150
nur - 99


In [139]:
"mehmet" in panser

False

In [140]:
121 in panser  # index'lere göre bakar. mevcutsa True döndürür

False

In [147]:
121 in panser.values  # value lere göre istediğimiz için True

True

In [142]:
"ali" in panser  # ali index'te old. için True

True

In [143]:
"nur" in panser

True

In [144]:
"gulnur" in panser

False

In [148]:
99 in panser.values

True

In [149]:
500 in panser.values

False

In [150]:
panser["veli"] = 571
panser

ali     121
veli    571
ayşe    150
nur      99
dtype: int64

In [151]:
panser > 130

ali     False
veli     True
ayşe     True
nur     False
dtype: bool

In [152]:
panser[panser > 130]  #sadece True ları döndürür

veli    571
ayşe    150
dtype: int64

In [155]:
panser[[False, True, True, False]]  # Fancy metodu ile sadece True belirttiklerimizi döndürdk

veli    571
ayşe    150
dtype: int64

## DataFrames

In [156]:
datam = [1, 2, 39, 67, 90]

In [157]:
datam

[1, 2, 39, 67, 90]

In [158]:
pd.DataFrame(datam, columns=["column_name"])

Unnamed: 0,column_name
0,1
1,2
2,39
3,67
4,90


### Creating a DataFrame using a NumPy Arrays

In [159]:
m = np.arange(1,10).reshape(3,3)

In [160]:
m

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [162]:
df = pd.DataFrame(m, columns = ["var1", "var2", "var3"])

df

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [163]:
df.head()

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [164]:
df.tail()

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [165]:
df.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [166]:
df.columns = ["new1", "new2", "new3"]

df

Unnamed: 0,new1,new2,new3
0,1,2,3
1,4,5,6
2,7,8,9


In [167]:
type(df)

pandas.core.frame.DataFrame

In [168]:
df.shape

(3, 3)

In [169]:
df.size

9

In [170]:
df.values

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [171]:
type(df.values)

numpy.ndarray


### Creating a DataFrame using a dict

In [172]:
s1 = np.random.randint(10, size=5)
s2 = np.random.randint(10, size=5)
s3 = np.random.randint(10, size=5)

In [173]:
s1

array([2, 2, 4, 5, 7])

In [174]:
s2

array([6, 4, 3, 7, 5])

In [175]:
s3

array([8, 0, 0, 7, 0])

In [176]:
myDict = {"var1" : s1, "var2" : s2, "var3" : s3}

In [177]:
myDict

{'var1': array([2, 2, 4, 5, 7]),
 'var2': array([6, 4, 3, 7, 5]),
 'var3': array([8, 0, 0, 7, 0])}

In [178]:
df1 = pd.DataFrame(myDict)

In [181]:
df1

Unnamed: 0,var1,var2,var3
0,2,6,8
1,2,4,0
2,4,3,0
3,5,7,7
4,7,5,0


In [184]:
df1[1:3]

## df[1]--> dataframe'de bu şekilde indexleme yapılmaz. hata verir. slice'lama ile idare et

Unnamed: 0,var1,var2,var3
1,2,4,0
2,4,3,0


In [187]:
df1.index

RangeIndex(start=0, stop=5, step=1)

In [188]:
df1

Unnamed: 0,var1,var2,var3
0,2,6,8
1,2,4,0
2,4,3,0
3,5,7,7
4,7,5,0


In [186]:
[i for i in df1.index]

[0, 1, 2, 3, 4]

In [189]:
df1.index = ["a", "b", "c", "d","e"]  # index ismini değiştirdik

df1

Unnamed: 0,var1,var2,var3
a,2,6,8
b,2,4,0
c,4,3,0
d,5,7,7
e,7,5,0


In [190]:
df1["b" : "d"]  # index isimleri üzerinden slice'lama yaptık. d dahil verdi

Unnamed: 0,var1,var2,var3
b,2,4,0
c,4,3,0
d,5,7,7


In [194]:
"var2" in df1  # column name üzerinden yapabiliriz.

True

In [196]:
7 in df1  # içerik üzerinden True dönmez

False