In [1]:
import pandas as pd
import numpy as np

# Pandas

## Series

In [2]:
g7_pop = pd.Series([35.467, 63.951, 80.940, 60.665])

In [3]:
g7_pop

0    35.467
1    63.951
2    80.940
3    60.665
dtype: float64

In [4]:
g7_pop.name = "G7 Population in millioms"

In [5]:
g7_pop

0    35.467
1    63.951
2    80.940
3    60.665
Name: G7 Population in millioms, dtype: float64

In [6]:
g7_pop.dtypes

dtype('float64')

In [7]:
g7_pop.values

array([35.467, 63.951, 80.94 , 60.665])

In [8]:
g7_pop[0]

35.467

In [9]:
g7_pop.index

RangeIndex(start=0, stop=4, step=1)

In [12]:
g7_pop.index=[
    'Canada',
    'France',
    'Germany',
    'Italy'
]

In [13]:
g7_pop

Canada     35.467
France     63.951
Germany    80.940
Italy      60.665
Name: G7 Population in millioms, dtype: float64

In [14]:
pd.Series({
    'a':20,
    'b':30,
    'c':50
},  name ='dict example')

a    20
b    30
c    50
Name: dict example, dtype: int64

In [16]:
pd.Series(
    [20, 30, 40],
    index = ['a', 'b', 'c'],
    name = 'list example'
)

a    20
b    30
c    40
Name: list example, dtype: int64

In [17]:
# Yeni bir seri objesi oluştu. Alt bir seri
pd.Series(g7_pop, index = ['France', 'Germany'])

France     63.951
Germany    80.940
Name: G7 Population in millioms, dtype: float64

In [18]:
# Olayan veriye Nan atanıyor
pd.Series(g7_pop, index = ['France', 'Germany', 'Turkey'])

France     63.951
Germany    80.940
Turkey        NaN
Name: G7 Population in millioms, dtype: float64

In [19]:
g7_pop['Germany']

80.94

In [20]:
# index location
g7_pop.iloc[0]

35.467

In [21]:
g7_pop[['Italy', 'France']]

Italy     60.665
France    63.951
Name: G7 Population in millioms, dtype: float64

In [22]:
g7_pop > 70

Canada     False
France     False
Germany     True
Italy      False
Name: G7 Population in millioms, dtype: bool

In [23]:
g7_pop[g7_pop > 70]

Germany    80.94
Name: G7 Population in millioms, dtype: float64

In [24]:
g7_pop.mean()

60.25575

In [None]:
## Bitwise operatörleri kullanılıyor anahtar kelimeolarak kullanılmıyor

In [27]:
g7_pop[(g7_pop > g7_pop.mean()) | (g7_pop > (g7_pop.std()/2 + g7_pop.mean())) ]

France     63.951
Germany    80.940
Italy      60.665
Name: G7 Population in millioms, dtype: float64

In [29]:
g7_pop['Canada'] = 40.5

In [30]:
g7_pop

Canada     40.500
France     63.951
Germany    80.940
Italy      60.665
Name: G7 Population in millioms, dtype: float64

In [31]:
g7_pop.iloc[-1] = 50

In [32]:
g7_pop["France" : "Italy"]

France     63.951
Germany    80.940
Italy      50.000
Name: G7 Population in millioms, dtype: float64

In [33]:
g7_pop["France" : "Italy"].max()

80.94

## Dataframe

In [None]:
# Serilerin birleştirilmiş bir tablo
# Serlerden dah üst bir obje

In [34]:
df = pd.DataFrame({
    'Population': [35.467, 63.951, 80.94 , 60.665, 127.061, 64.511, 318.523],
    'GDP': [
        1785387,
        2833687,
        3874437,
        2167744,
        4602367,
        2950039,
        17348075
    ],
    'Surface Area': [
        9984670,
        640679,
        357114,
        301336,
        377930,
        242495,
        9525067
    ],
    'HDI': [
        0.913,
        0.888,
        0.916,
        0.873,
        0.891,
        0.907,
        0.915
    ],
    'Continent': [
        'America',
        'Europe',
        'Europe',
        'Europe',
        'Asia',
        'Europe',
        'America'
    ]
}, columns=['Population', 'GDP', 'Surface Area', 'HDI', 'Continent'])

In [35]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
0,35.467,1785387,9984670,0.913,America
1,63.951,2833687,640679,0.888,Europe
2,80.94,3874437,357114,0.916,Europe
3,60.665,2167744,301336,0.873,Europe
4,127.061,4602367,377930,0.891,Asia
5,64.511,2950039,242495,0.907,Europe
6,318.523,17348075,9525067,0.915,America


In [36]:
df.index = [
    
    'Canada',
    'France',
    'Germany',
    'Italy',
    'Japan',
    'United Kingdom',
    'United States',
]


In [37]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [38]:
df.columns

Index(['Population', 'GDP', 'Surface Area', 'HDI', 'Continent'], dtype='object')

In [47]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, Canada to United States
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Population    7 non-null      float64
 1   GDP           7 non-null      int64  
 2   Surface Area  7 non-null      int64  
 3   HDI           7 non-null      float64
 4   Continent     7 non-null      object 
dtypes: float64(2), int64(2), object(1)
memory usage: 336.0+ bytes


In [44]:
df.index

Index(['Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom',
       'United States'],
      dtype='object')

In [41]:
df.describe

<bound method NDFrame.describe of                 Population       GDP  Surface Area    HDI Continent
Canada              35.467   1785387       9984670  0.913   America
France              63.951   2833687        640679  0.888    Europe
Germany             80.940   3874437        357114  0.916    Europe
Italy               60.665   2167744        301336  0.873    Europe
Japan              127.061   4602367        377930  0.891      Asia
United Kingdom      64.511   2950039        242495  0.907    Europe
United States      318.523  17348075       9525067  0.915   America>

In [42]:
df.describe()

Unnamed: 0,Population,GDP,Surface Area,HDI
count,7.0,7.0,7.0,7.0
mean,107.302571,5080248.0,3061327.0,0.900429
std,97.24997,5494020.0,4576187.0,0.016592
min,35.467,1785387.0,242495.0,0.873
25%,62.308,2500716.0,329225.0,0.8895
50%,64.511,2950039.0,377930.0,0.907
75%,104.0005,4238402.0,5082873.0,0.914
max,318.523,17348080.0,9984670.0,0.916


In [45]:
df.size

35

In [46]:
df.shape

(7, 5)

### loc -> label based location
### iloc -> integer based location

In [48]:
df.loc["Canada"]

Population       35.467
GDP             1785387
Surface Area    9984670
HDI               0.913
Continent       America
Name: Canada, dtype: object

In [50]:
df.iloc[0]

Population       35.467
GDP             1785387
Surface Area    9984670
HDI               0.913
Continent       America
Name: Canada, dtype: object

In [51]:
df["Continent"]

Canada            America
France             Europe
Germany            Europe
Italy              Europe
Japan                Asia
United Kingdom     Europe
United States     America
Name: Continent, dtype: object

In [53]:
# Seriden dataframe geçtik
df["Continent"].to_frame()

Unnamed: 0,Continent
Canada,America
France,Europe
Germany,Europe
Italy,Europe
Japan,Asia
United Kingdom,Europe
United States,America


In [57]:
#İki tane olunca series olmayıp dataframe olduğu için to frame gerek kalmıyor
df[["Continent", "GDP"]]

Unnamed: 0,Continent,GDP
Canada,America,1785387
France,Europe,2833687
Germany,Europe,3874437
Italy,Europe,2167744
Japan,Asia,4602367
United Kingdom,Europe,2950039
United States,America,17348075


In [58]:
df[1:3]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe


In [59]:
# Burda Italy dahil ediliyor
df.loc['France': 'Italy']

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe


In [60]:
a = df.loc['France': 'Italy', 'GDP']

In [61]:
# type python fonk olduğu için yolunu bu şekilde veriyo
#Pandas içinden seriye ulaşıyor
type(a)

pandas.core.series.Series

In [62]:
a.dtypes

dtype('int64')

In [64]:
df.loc['France': 'Italy',[ 'GDP', 'Population']]

Unnamed: 0,GDP,Population
France,2833687,63.951
Germany,3874437,80.94
Italy,2167744,60.665


In [65]:
df['Population'] > 70

Canada            False
France            False
Germany            True
Italy             False
Japan              True
United Kingdom    False
United States      True
Name: Population, dtype: bool

In [66]:
df.loc[df['Population'] > 70]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Germany,80.94,3874437,357114,0.916,Europe
Japan,127.061,4602367,377930,0.891,Asia
United States,318.523,17348075,9525067,0.915,America


In [68]:
df.drop('Canada')

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [69]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [70]:
# Dataframe fonk bazıları df değiştirir bazıları değiştirmez önceden kontrol et
#değiştirel df yeni bir df atılması yanlış bir yaklaşım
# dataframede istenilen değişiklik yapıp üstünde yapmak istediğiniz işlemleri yapabilirsiniz fonkları kullanarak bunu da görüntüleyebiliriz
# O halini görüntelemek için yeni değişkene atılmasına gerek yok ama tutmak istiyorsak üstüne yazılabilir
#df.drop().to....

In [72]:
df.drop(['Population', 'HDI'], axis = 'columns')

Unnamed: 0,GDP,Surface Area,Continent
Canada,1785387,9984670,America
France,2833687,640679,Europe
Germany,3874437,357114,Europe
Italy,2167744,301336,Europe
Japan,4602367,377930,Asia
United Kingdom,2950039,242495,Europe
United States,17348075,9525067,America


In [73]:
df[['Population', 'HDI']]/100

Unnamed: 0,Population,HDI
Canada,0.35467,0.00913
France,0.63951,0.00888
Germany,0.8094,0.00916
Italy,0.60665,0.00873
Japan,1.27061,0.00891
United Kingdom,0.64511,0.00907
United States,3.18523,0.00915


In [75]:
langs = pd.Series(
    ['French', 'German', 'Italian'],
    index= ['France', 'Germany', 'Italy'],
    name='Language' 
)

In [76]:
# atama dataframe i değiştirir
df['language'] = langs

In [77]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,language
Canada,35.467,1785387,9984670,0.913,America,
France,63.951,2833687,640679,0.888,Europe,French
Germany,80.94,3874437,357114,0.916,Europe,German
Italy,60.665,2167744,301336,0.873,Europe,Italian
Japan,127.061,4602367,377930,0.891,Asia,
United Kingdom,64.511,2950039,242495,0.907,Europe,
United States,318.523,17348075,9525067,0.915,America,


In [78]:
df["Language"] = langs
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,language,Language
Canada,35.467,1785387,9984670,0.913,America,,
France,63.951,2833687,640679,0.888,Europe,French,French
Germany,80.94,3874437,357114,0.916,Europe,German,German
Italy,60.665,2167744,301336,0.873,Europe,Italian,Italian
Japan,127.061,4602367,377930,0.891,Asia,,
United Kingdom,64.511,2950039,242495,0.907,Europe,,
United States,318.523,17348075,9525067,0.915,America,,


In [82]:
df = df.drop(['language'], axis="columns")

In [83]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,
France,63.951,2833687,640679,0.888,Europe,French
Germany,80.94,3874437,357114,0.916,Europe,German
Italy,60.665,2167744,301336,0.873,Europe,Italian
Japan,127.061,4602367,377930,0.891,Asia,
United Kingdom,64.511,2950039,242495,0.907,Europe,
United States,318.523,17348075,9525067,0.915,America,


In [86]:
##deprecated
df.append(
    pd.Series(
        {
        'Population': 0,
         'Continent': 'Asia', 
         'Langage': 'Turkish'
        }, name = 'Turkey')
)

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language,Langage
Canada,35.467,1785387.0,9984670.0,0.913,America,,
France,63.951,2833687.0,640679.0,0.888,Europe,French,
Germany,80.94,3874437.0,357114.0,0.916,Europe,German,
Italy,60.665,2167744.0,301336.0,0.873,Europe,Italian,
Japan,127.061,4602367.0,377930.0,0.891,Asia,,
United Kingdom,64.511,2950039.0,242495.0,0.907,Europe,,
United States,318.523,17348075.0,9525067.0,0.915,America,,
Turkey,0.0,,,,Asia,,Turkish


In [87]:
new_row= pd.Series(
        {
        'Population': 0,
         'Continent': 'Asia', 
         'Langage': 'Turkish'
        }, name = 'Turkey')

In [92]:
pd.concat([df, new_row.to_frame().T], ignore_index = False)

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language,Langage
Canada,35.467,1785387.0,9984670.0,0.913,America,,
France,63.951,2833687.0,640679.0,0.888,Europe,French,
Germany,80.94,3874437.0,357114.0,0.916,Europe,German,
Italy,60.665,2167744.0,301336.0,0.873,Europe,Italian,
Japan,127.061,4602367.0,377930.0,0.891,Asia,,
United Kingdom,64.511,2950039.0,242495.0,0.907,Europe,,
United States,318.523,17348075.0,9525067.0,0.915,America,,
Turkey,0.0,,,,Asia,,Turkish


In [None]:
## use concat instead of append.