Pandas DataFrame → Excel dosyası gibi → sütun adları, satır indeksleri, karışık veri tipleri (sayı, string, tarih) barındırır.

In [1]:
import pandas as pd

data = {
    "name": ["Nilay", "Emre", "Ayşe", "Ali", "Zeynep"],
    "age": [22, 25, 23, 24, 22],
    "score": [88, 92, 95, 70, 85],
    "department": ["CS", "CS", "Math", "Physics", "Math"]
}

df = pd.DataFrame(data)
print(df)


     name  age  score department
0   Nilay   22     88         CS
1    Emre   25     92         CS
2    Ayşe   23     95       Math
3     Ali   24     70    Physics
4  Zeynep   22     85       Math


df.head() → ilk 5 satırı gösterir (hızlı göz atma için).

df.info() → sütun tipleri, kaç tane null değer var, DataFrame boyutu.

df.describe() → sayısal sütunların istatistik özetini çıkarır (ortalama, min, max, std...).

In [2]:
print("İlk 5 satır:")
print(df.head())

print("\nBilgi:")
print(df.info())

print("\nİstatistik özet:")
print(df.describe())


İlk 5 satır:
     name  age  score department
0   Nilay   22     88         CS
1    Emre   25     92         CS
2    Ayşe   23     95       Math
3     Ali   24     70    Physics
4  Zeynep   22     85       Math

Bilgi:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   name        5 non-null      object
 1   age         5 non-null      int64 
 2   score       5 non-null      int64 
 3   department  5 non-null      object
dtypes: int64(2), object(2)
memory usage: 292.0+ bytes
None

İstatistik özet:
            age      score
count   5.00000   5.000000
mean   23.20000  86.000000
std     1.30384   9.721111
min    22.00000  70.000000
25%    22.00000  85.000000
50%    23.00000  88.000000
75%    24.00000  92.000000
max    25.00000  95.000000


In [None]:
print(df["department"].unique())   
print(df["department"].nunique())  


['CS' 'Math' 'Physics']
3


In [4]:
print(df.describe(include=[object]))
print(df["department"].value_counts())


         name department
count       5          5
unique      5          3
top     Nilay         CS
freq        1          2
department
CS         2
Math       2
Physics    1
Name: count, dtype: int64


In [5]:
import pandas as pd
import numpy as np

data = {
    "student": ["Nilay", "Emre", "Ayşe", "Ali", "Nilay", "Emre", "Zeynep", "Ali"],
    "course": ["Math", "CS", "Physics", "CS", "Math", "CS", "Math", "Physics"],
    "score": [90, 85, 70, 88, 95, 92, 60, 75],
    "year": [2023, 2023, 2023, 2024, 2024, 2024, 2023, 2024]
}

df = pd.DataFrame(data)
print(df)


  student   course  score  year
0   Nilay     Math     90  2023
1    Emre       CS     85  2023
2    Ayşe  Physics     70  2023
3     Ali       CS     88  2024
4   Nilay     Math     95  2024
5    Emre       CS     92  2024
6  Zeynep     Math     60  2023
7     Ali  Physics     75  2024


In [6]:
print(df.describe(include=[np.number]))   # sayısal sütunlar
print(df.describe(include=[object]))      # kategorik sütunlar


           score         year
count   8.000000     8.000000
mean   81.875000  2023.500000
std    12.275847     0.534522
min    60.000000  2023.000000
25%    73.750000  2023.000000
50%    86.500000  2023.500000
75%    90.500000  2024.000000
max    95.000000  2024.000000
       student course
count        8      8
unique       5      3
top      Nilay   Math
freq         2      3


In [7]:
print(df["student"].value_counts())
print(df["course"].value_counts())


student
Nilay     2
Emre      2
Ali       2
Ayşe      1
Zeynep    1
Name: count, dtype: int64
course
Math       3
CS         3
Physics    2
Name: count, dtype: int64


In [8]:
print(df.groupby("student")["score"].mean())     # her öğrencinin ortalama notu
print(df.groupby("course")["score"].mean())      # her dersin ortalama notu
print(df.groupby("year")["score"].agg(["mean","max","min"]))  # her yıl için istatistik


student
Ali       81.5
Ayşe      70.0
Emre      88.5
Nilay     92.5
Zeynep    60.0
Name: score, dtype: float64
course
CS         88.333333
Math       81.666667
Physics    72.500000
Name: score, dtype: float64
       mean  max  min
year                 
2023  76.25   90   60
2024  87.50   95   75


In [9]:
print(df[df["score"] > 85])       # 85 üstü alanlar
print(df[(df["course"] == "CS") & (df["year"] == 2024)])   # 2024'te CS dersi


  student course  score  year
0   Nilay   Math     90  2023
3     Ali     CS     88  2024
4   Nilay   Math     95  2024
5    Emre     CS     92  2024
  student course  score  year
3     Ali     CS     88  2024
5    Emre     CS     92  2024


In [10]:
print(df["student"].unique())    # hangi öğrenciler var
print(df["student"].nunique())   # kaç farklı öğrenci var


['Nilay' 'Emre' 'Ayşe' 'Ali' 'Zeynep']
5


In [None]:
print(df.sort_values("score", ascending=False))   # en yüksekten düşüğe notlar
print(df.sort_values(["year", "score"], ascending=[True, False]))  # önce yıla, sonra nota göre ????? ascending=False → büyükten küçüğe (azalan sıralama).


  student   course  score  year
4   Nilay     Math     95  2024
5    Emre       CS     92  2024
0   Nilay     Math     90  2023
3     Ali       CS     88  2024
1    Emre       CS     85  2023
7     Ali  Physics     75  2024
2    Ayşe  Physics     70  2023
6  Zeynep     Math     60  2023
  student   course  score  year
0   Nilay     Math     90  2023
1    Emre       CS     85  2023
2    Ayşe  Physics     70  2023
6  Zeynep     Math     60  2023
4   Nilay     Math     95  2024
5    Emre       CS     92  2024
3     Ali       CS     88  2024
7     Ali  Physics     75  2024


In [12]:
# 2024 yılında dersi alan öğrencilerin ortalama notları
print(df[df["year"]==2024].groupby("course")["score"].mean())


course
CS         90.0
Math       95.0
Physics    75.0
Name: score, dtype: float64


In [13]:
print(df.groupby(["year","course"])["score"].mean())


year  course 
2023  CS         85.0
      Math       75.0
      Physics    70.0
2024  CS         90.0
      Math       95.0
      Physics    75.0
Name: score, dtype: float64


In [14]:
print(df["score"])           # tek sütun
print(df[["student","score"]])  # birden fazla sütun


0    90
1    85
2    70
3    88
4    95
5    92
6    60
7    75
Name: score, dtype: int64
  student  score
0   Nilay     90
1    Emre     85
2    Ayşe     70
3     Ali     88
4   Nilay     95
5    Emre     92
6  Zeynep     60
7     Ali     75


In [None]:
print(df.loc[0])     # index=0 olan satır
print(df.iloc[2])    # 2. satır (index sırasına göre) loc → etikete göre seçer (index adı veya sütun adı).
#iloc → sıraya göre seçer (0,1,2 gibi).


student    Nilay
course      Math
score         90
year        2023
Name: 0, dtype: object
student       Ayşe
course     Physics
score           70
year          2023
Name: 2, dtype: object


In [16]:
print(df.loc[0:2, ["student","score"]])   # 0–2 arası satırlar, sadece bu 2 sütun
print(df.iloc[0:3, 1:3])                  # 0–2 satırlar, 1–2 sütunlar


  student  score
0   Nilay     90
1    Emre     85
2    Ayşe     70
    course  score
0     Math     90
1       CS     85
2  Physics     70


Önce eksik veri (NaN) oluştur

In [17]:
import numpy as np

df.loc[2,"score"] = np.nan   # Ayşe'nin notunu NaN yapıyoruz
print(df)


  student   course  score  year
0   Nilay     Math   90.0  2023
1    Emre       CS   85.0  2023
2    Ayşe  Physics    NaN  2023
3     Ali       CS   88.0  2024
4   Nilay     Math   95.0  2024
5    Emre       CS   92.0  2024
6  Zeynep     Math   60.0  2023
7     Ali  Physics   75.0  2024


Sonra doldur

In [18]:
# Eksikleri 0 ile doldur
print(df.fillna(0))

# Eksikleri ortalama ile doldur
print(df.fillna(df["score"].mean()))


  student   course  score  year
0   Nilay     Math   90.0  2023
1    Emre       CS   85.0  2023
2    Ayşe  Physics    0.0  2023
3     Ali       CS   88.0  2024
4   Nilay     Math   95.0  2024
5    Emre       CS   92.0  2024
6  Zeynep     Math   60.0  2023
7     Ali  Physics   75.0  2024
  student   course      score  year
0   Nilay     Math  90.000000  2023
1    Emre       CS  85.000000  2023
2    Ayşe  Physics  83.571429  2023
3     Ali       CS  88.000000  2024
4   Nilay     Math  95.000000  2024
5    Emre       CS  92.000000  2024
6  Zeynep     Math  60.000000  2023
7     Ali  Physics  75.000000  2024


Yeni sütun ekleme (assign)

In [19]:
df2 = df.assign(score_bonus = df["score"] + 5)
print(df2.head())


  student   course  score  year  score_bonus
0   Nilay     Math   90.0  2023         95.0
1    Emre       CS   85.0  2023         90.0
2    Ayşe  Physics    NaN  2023          NaN
3     Ali       CS   88.0  2024         93.0
4   Nilay     Math   95.0  2024        100.0
