In [1]:
import pandas as pd
import numpy as np

np.random.seed(42)

n = 1000

df = pd.DataFrame({
    "user_id": np.random.randint(1, 300, n),
    "city": np.random.choice(
        [" İstanbul", "ANKARA ", "Izmir", "Bursa ", "istanbul"], n
    ),
    "price": np.random.choice(
        ["100", "200", "300", "N/A", "400"], n
    ),
    "rating": np.round(np.random.uniform(1, 5, n), 1)
})

# --- DAY07 TEMİZLİK ---

df["price"] = pd.to_numeric(df["price"], errors="coerce")
df["price"] = df["price"].fillna(df["price"].median())

df["city"] = (
    df["city"]
    .str.strip()
    .str.lower()
    .str.replace("ı", "i")
    .str.replace("i̇", "i")
)

df.head()


Unnamed: 0,user_id,city,price,rating
0,103,izmir,400.0,4.9
1,271,ankara,300.0,2.2
2,107,istanbul,200.0,4.8
3,72,izmir,100.0,2.6
4,189,ankara,400.0,2.5


In [2]:
df[df["city"] == "istanbul"].head()


Unnamed: 0,user_id,city,price,rating
2,107,istanbul,200.0,4.8
7,122,istanbul,200.0,1.6
11,152,istanbul,200.0,4.3
12,131,istanbul,400.0,2.8
19,22,istanbul,100.0,4.9


In [3]:
df[df["price"] > 300].head()


Unnamed: 0,user_id,city,price,rating
0,103,izmir,400.0,4.9
4,189,ankara,400.0,2.5
8,215,ankara,400.0,3.2
12,131,istanbul,400.0,2.8
15,294,ankara,400.0,1.8


In [4]:
df[(df["city"] == "istanbul") & (df["price"] > 300)].head()


Unnamed: 0,user_id,city,price,rating
12,131,istanbul,400.0,2.8
43,264,istanbul,400.0,4.5
66,53,istanbul,400.0,4.9
78,63,istanbul,400.0,1.2
81,163,istanbul,400.0,3.0


In [5]:
df[(df["city"] == "istanbul") | (df["city"] == "ankara")].head()


Unnamed: 0,user_id,city,price,rating
1,271,ankara,300.0,2.2
2,107,istanbul,200.0,4.8
4,189,ankara,400.0,2.5
5,21,ankara,300.0,2.4
7,122,istanbul,200.0,1.6


In [6]:
df.sort_values("price").head()


Unnamed: 0,user_id,city,price,rating
999,144,istanbul,100.0,5.0
674,126,bursa,100.0,3.9
670,34,istanbul,100.0,4.7
667,184,bursa,100.0,2.7
434,90,ankara,100.0,3.1


In [7]:
df.sort_values("rating", ascending=False).head()



Unnamed: 0,user_id,city,price,rating
999,144,istanbul,100.0,5.0
357,279,istanbul,400.0,5.0
97,214,istanbul,400.0,5.0
28,175,istanbul,300.0,5.0
304,104,ankara,300.0,5.0


In [8]:
df[df["city"] == "istanbul"].sort_values("price", ascending=False).head()


Unnamed: 0,user_id,city,price,rating
675,235,istanbul,400.0,1.0
280,280,istanbul,400.0,2.4
273,220,istanbul,400.0,4.2
272,172,istanbul,400.0,4.1
271,17,istanbul,400.0,1.1


In [9]:
df.groupby("city")["price"].mean()


city
ankara      267.289720
bursa       252.040816
istanbul    262.034739
izmir       268.449198
Name: price, dtype: float64

In [10]:
df["price"] = pd.to_numeric(df["price"], errors="coerce")


In [11]:
df.groupby("city")["user_id"].count()


city
ankara      214
bursa       196
istanbul    403
izmir       187
Name: user_id, dtype: int64

In [12]:
df.groupby("city").agg(
    avg_price=("price", "mean"),
    max_price=("price", "max"),
    avg_rating=("rating", "mean"),
    user_count=("user_id", "count")
)


Unnamed: 0_level_0,avg_price,max_price,avg_rating,user_count
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ankara,267.28972,400.0,2.909813,214
bursa,252.040816,400.0,2.960714,196
istanbul,262.034739,400.0,3.032506,403
izmir,268.449198,400.0,3.080214,187


In [13]:
df.groupby("city")["price"].agg(["mean", "median"])


Unnamed: 0_level_0,mean,median
city,Unnamed: 1_level_1,Unnamed: 2_level_1
ankara,267.28972,300.0
bursa,252.040816,300.0
istanbul,262.034739,300.0
izmir,268.449198,300.0
