**Imports**

In [None]:
import numpy as np
import seaborn as sns
import pandas as pd
import missingno as msno
import matplotlib.pyplot as plt
import plotly.express as px

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score as ass
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import MultinomialNB

#Linear Discriminant Analysis kütüphaneleri
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.covariance import LedoitWolf
from sklearn.covariance import MinCovDet
from sklearn.covariance import OAS

**Specify File Paths**

In [None]:
data_dir = "/kaggle/input/uncovering-factors-that-affect-used-car-prices/autos.csv"
output_dir = ("/kaggle/working/")

**Read Data**

In [None]:
data = pd.read_csv(f"{data_dir}")

In [None]:
msno.matrix(data)
# data üzerindeki eksik verileri gösterme.

In [None]:
# eksik değerleri kontrol etme
msno.bar(data, sort='ascending')

In [None]:
data.fillna(0, inplace=True)
data = data[data.price > 0]
data = data[data.price < 10**5]
data = data[data.yearOfRegistration > 1900]
data = data[data.yearOfRegistration <= 2023]
data['dateCrawled'] = pd.to_datetime(data['dateCrawled'])
data['yearOfRegistration'] = data['yearOfRegistration'].astype(int)
data['price'] = data['price'].astype(int)
# data üzerindeki eksik verileri yok etme.

In [None]:
msno.matrix(data)
# data üzerinde temizlik yaptıktan sonra eksik verileri gösterme.

In [None]:
msno.bar(data, sort='ascending')

In [None]:
temp_gearbox_data=data[data['model'].notna()].groupby('model')['gearbox'].agg(lambda x: pd.Series.mode(x)[0]).to_frame()
temp_gearbox_data=temp_gearbox_data.reset_index()
data=data.merge(temp_gearbox_data, on='model', suffixes=('','_tomap'), how='left')
data['gearbox']= np.where((data['gearbox'].isna())&(data['model'].notna()), data['gearbox_tomap'], data['gearbox'])
del temp_gearbox_data
data.drop(['gearbox_tomap'], axis=1, inplace=True)
data['gearbox'].fillna(data['gearbox'].agg(lambda x: pd.Series.mode(x)[0]), inplace=True)
data['gearbox']=pd.factorize(data['gearbox'])[0]

**Data Description**

In [None]:
data.head
print(data.columns), print(type(data)), print("Data shape is :",data.shape)

In [None]:
# index Sutünunu .iloc komutu ile düşürüyoruz.
data = data.iloc[:,1:]
data.head(), print(data.columns), print("new data shape is :",data.shape)

In [None]:
# Her sınıfın ortalamalarını histogram olarak gösteriyoruz.
plt.figure(figsize=(30,10))
data_mean = data.groupby(data.brand).mean()
data_mean.plot.bar()
plt.title("Autos Data Setini İnceliyoruz")
plt.xlabel("Brands")
plt.ylabel("Values")
plt.show()


# Dağılımına bakmak istiyorum: Gaussian Distribution
# data.plot.kde()

In [None]:
data.corr()
# Sayısal değişkenler arasında korelasyon olup olmadığını gösterme.
# Korelasyon katsayısı = -1 < c < 1
# c = -1 (ters yönlü mükemmel ilişki)
# c = 0 (ilişki yok)
# c = 1 (aynı yönlü mükemmel ilişki)
# c = 0.8 (aynı yönlü iyi ilişki)
# c = -0.8 (ters yönlü iyi ilişki)

In [None]:
corr = data.corr()
figure, axis=plt.subplots(figsize=(10,10))
sns.heatmap(corr)
# Korelasyon katsayılarını daha iyi okuyabilmek için ısı haritası çizme

In [None]:
# heat map
data.corr().style.background_gradient('Blues')

In [None]:
# 2 boyutta dağılımını görme.
plt.figure(figsize=(30,10))
sns.relplot(data=data, x="yearOfRegistration", y="powerPS" ,hue="brand")
plt.show()
plt.figure(figsize=(30,10))
sns.relplot(data=data, x="monthOfRegistration", y="powerPS" ,hue="brand")
plt.show()
plt.figure(figsize=(30,10))
sns.relplot(data=data, x="kilometer", y="powerPS" ,hue="brand")
plt.show()
plt.figure(figsize=(30,10))
sns.relplot(data=data, x="price", y="powerPS" ,hue="brand")
plt.show()
plt.figure(figsize=(30,10))
sns.relplot(data=data, x="postalCode", y="powerPS" ,hue="brand")
plt.show()
plt.figure(figsize=(30,10))
sns.relplot(data=data, x="postalCode", y="price" ,hue="brand")
plt.show()
plt.figure(figsize=(30,10))
sns.relplot(data=data, x="postalCode", y="nrOfPictures" ,hue="brand")
plt.show()

In [None]:
# Sınıflara göre data dağılımını görme.
# sns.pairplot(data,hue ="brand")

In [None]:
plt.figure(figsize=(30,10))
sns.boxplot(x="brand", y="price",data=data)
plt.show()
plt.figure(figsize=(30,10))
sns.boxplot(x="brand", y="yearOfRegistration",data=data)
plt.show()
plt.figure(figsize=(30,10))
sns.boxplot(x="brand", y="monthOfRegistration",data=data)
plt.show()
plt.figure(figsize=(30,10))
sns.boxplot(x="brand", y="powerPS",data=data)
plt.show()
plt.figure(figsize=(30,10))
sns.boxplot(x="brand", y="kilometer",data=data)
plt.show()
plt.figure(figsize=(30,10))
sns.boxplot(x="brand", y="postalCode",data=data)
plt.show()

In [None]:
# x ve y olarak öznitelikler ile çıktı değerlerini ayrı dataframe'lere ayırma
x = data.iloc[:,[7,9,11,12,17,18]]
y = data.iloc[:,14:15]

le = LabelEncoder()
y_trasform = le.fit_transform(y)


x_train, x_test, y_train, y_test = train_test_split(x,y_trasform,train_size=0.33,random_state=0)

knc = KNeighborsClassifier(n_neighbors=3, algorithm = 'auto')
knc.fit(x_train, y_train)
pred = knc.predict(x_test)

ac_sc = accuracy_score(y_test, pred)
print('Başarı oranı: ', ac_sc)

cm = confusion_matrix(y_test, pred)
print('Başarı oranı: ', cm)

plt.figure(figsize=(30,10))
plt.scatter(x.iloc[:,0:1], y_trasform, color = 'blue')
plt.show()
plt.figure(figsize=(30,10))
plt.plot(y_trasform, color ='green')
plt.show()

In [None]:
# x ve y olarak öznitelikler ile çıktı değerlerini ayrı dataframe'lere ayırma
x = data.iloc[:,[7,9,11,12,17,18]]
y = data.iloc[:,14:15]

In [None]:
k_nn=KNeighborsClassifier(n_neighbors=8, metric="chebyshev")
logi = LogisticRegression(random_state=5)
DT = DecisionTreeClassifier(max_features="sqrt")
SDF = SGDClassifier(penalty="l2", random_state=10)
S_VC= SVC(degree=3,C=8,kernel="rbf")
RF= RandomForestClassifier(n_estimators=78, criterion= "gini")
Bayes=  GaussianNB()
MBayes = MultinomialNB()
BBayes = BernoulliNB()
LDA = LinearDiscriminantAnalysis(solver="eigen")
Result = []

In [None]:
# SDF
# cv_sonuc= cross_validate(SDF, x, y, cv=5 , scoring='accuracy')
# res=cv_sonuc['test_score'].mean()

# print("Accuracy of SDF: ", res*100, "%")
# Result.append( "SDF :")
# Result.append(res)

In [None]:
# KNN
cv_sonuc= cross_validate(k_nn, x, y, cv=5 , scoring='accuracy')
res=cv_sonuc['test_score'].mean()

print("Accuracy of KNN: ", res*100, "%")
Result.append( "KNN :")
Result.append(res)

In [None]:
# LOGISTIC REGRESSION
cv_sonuc= cross_validate(logi, x, y, cv=5 , scoring='accuracy')
res=cv_sonuc['test_score'].mean()

print("Accuracy of Logistic Regression: ", res*100, "%")
Result.append( "LR :")
Result.append(res)

In [None]:
# Decision Tree
cv_sonuc= cross_validate(DT, x, y, cv=5 , scoring='accuracy')
res=cv_sonuc['test_score'].mean()

print("Accuracy of Decision Tree: ", res*100, "%")
Result.append( "DT :")
Result.append(res)

In [None]:
# Support Vector Classifier
# cv_sonuc= cross_validate(S_VC, x, y, cv=5 , scoring='accuracy')
# res=cv_sonuc['test_score'].mean()

# print("Accuracy of Support Vector Classifier: ", res*100, "%")
# Result.append( "SVC :")
# Result.append(res)

In [None]:
# Linear Discriminant Analysis
cv_sonuc= cross_validate(LDA, x, y, cv=5 , scoring='accuracy')
res=cv_sonuc['test_score'].mean()
print("Accuracy of Linear Discriminant Analysis: ", res*100, "%")
Result.append( "LDA :")
Result.append(res)

In [None]:
data_dummy = pd.get_dummies(data, prefix='vehicleType', prefix_sep='.', columns=['vehicleType'])
data_dummy = pd.get_dummies(data_dummy, prefix='fuelType', prefix_sep='.', columns=['fuelType'], drop_first=True)

In [None]:
sns.displot(data = data_dummy, x='price', kind='kde', hue='brand')
sns.displot(data = data_dummy, x='price', kind='kde', hue='gearbox')

In [None]:
temp = abs(data.corr()['price']).sort_values(ascending=False) 
temp

In [None]:
data

In [None]:
data.isna().sum()
# Veri çercevesinde hangi öznitelikte kaç tane eksik değer olduğunu bulma.

In [None]:
data.info()
# Veri çercevesindeki değişkenlerin hangi tipte olduğuna bakma.

In [None]:
data.describe()
# Veri çercevesindeki sayısal değişkenler için temel istatistik değerlerini görüntüleme.

In [None]:
data.describe().T
# Veri çercevesindeki sayısal değişkenler için temel istatistik değerlerini görüntüleme.

In [None]:
data.head(10)
# Veri çercevesinin ilk 10 verisi.

In [None]:
data.columns

In [None]:
data.shape
 # Veri çercevesinin kaç özniteliğe ve kaç gözleme sahip olduğuna bakma.

In [None]:
plt.figure(figsize=(30,10))
stdData = data.groupby(["vehicleType"]).std()
features = ["0", "andere", "bus", "cabrio", "coupe", "kleinwagen", "kombi", "limousine", "suv"]
plt.plot(features, stdData, marker="o")
plt.xlabel('vehicle type')
plt.ylabel('average standard deviation')
plt.show()

In [None]:
plt.figure(figsize=(30,10))
stdData = data.groupby(["gearbox"]).std()
features = ["0", "automatik", "manuell"]
plt.plot(features, stdData, marker="o")
plt.xlabel('gear box')
plt.ylabel('average standard deviation')
plt.show()

In [None]:
data.groupby(["vehicleType"]).mean()

In [None]:
data.groupby(["brand"]).mean()

In [None]:
data.groupby(["model"]).mean()

In [None]:
data.groupby(["gearbox"]).mean()

In [None]:
data.groupby(["fuelType"]).mean()

In [None]:
data.groupby(["powerPS"]).mean()

In [None]:
data.groupby(["brand"])["price"].std()

In [None]:
data.groupby(["vehicleType"])["price"].std()

In [None]:
data.groupby(["model"])["price"].std()

In [None]:
print(data["powerPS"].max())
print(data["price"].max())
print(data["yearOfRegistration"].max())
print(data["kilometer"].max())
print(data["monthOfRegistration"].max())

In [None]:
print(data["powerPS"].min())
print(data["price"].min())
print(data["yearOfRegistration"].min())
print(data["kilometer"].min())
print(data["monthOfRegistration"].min())

In [None]:
print("Result 1: ")
data[(data["powerPS"] > 10000) & (data["brand"] == "bmw")]


In [None]:
print("Result 2: ")
data[(data["kilometer"] > 140000) & (data["vehicleType"] == "bus")]

In [None]:
print("Result 3: ")
data[(data["powerPS"] > 19000) & (data["gearbox"] == "automatik") & (data["brand"] == "mercedes_benz")]

In [None]:
print("Result 4: ")
data[(data["powerPS"] > 2000) & (data["yearOfRegistration"] > 2015) & (data["price"] > 7000) &  (data["brand"] == "bmw")]

In [None]:
print("Result 5: ")
data[(data["kilometer"] < 7000) & (data["yearOfRegistration"] > 2018) & (data["price"] > 6000)]

In [None]:
sns.lmplot(x = 'price', y = 'powerPS', fit_reg = False, hue = 'kilometer', data = data)

In [None]:
print(sns.violinplot(y = "price", data=data, color="green"))
# Normal dağılımda mod, medyan ve aritmetik ortalama birbirine eşittir.
# Eğrinin maksimum noktası aritmetik ortalamadır (dolayısıyla mod ve medyandır).
# Eğri aritmetik ortalamaya göre simetriktir.

In [None]:
sns.distplot(data["price"], bins=16, color="black");
# distplot çizdirme

In [None]:
print(sns.violinplot(y = "powerPS", data=data, color="green"))

In [None]:
sns.distplot(data["powerPS"], bins=16, color="black");
# distplot çizdirme

In [None]:
print(sns.violinplot(y = "yearOfRegistration", data=data, color="green"))

In [None]:
sns.distplot(data["yearOfRegistration"], bins=16, color="black");
# distplot çizdirme

In [None]:
print(sns.violinplot(y = "kilometer", data=data, color="green"))

In [None]:
sns.distplot(data["kilometer"], bins=16, color="black");
# distplot çizdirme

In [None]:
print(sns.violinplot(y = "postalCode", data=data, color="green"))

In [None]:
sns.distplot(data["postalCode"], bins=16, color="black");
# distplot çizdirme

**value_counts()**

In [None]:
name_count = data['name'].value_counts()
print(name_count)

print("\nNumber of Unique Values:")
print(data["name"].nunique())

print("\nUnique Values:")
data.name.unique()

In [None]:
price_count = data['price'].value_counts()
print(price_count)

print("\nNumber of Unique Values:")
print(data["price"].nunique())

print("\nUnique Values:")
data.price.unique()

In [None]:
vehicleType_count = data['vehicleType'].value_counts()
print(vehicleType_count)

print("\nNumber of Unique Values:")
print(data["vehicleType"].nunique())

print("\nUnique Values:")
data.vehicleType.unique()

In [None]:
vehicleType_graph = px.bar(x=vehicleType_count.index, y=vehicleType_count.values, labels={'x':'Vehicle Type', 'y':'Number of Cars'})
vehicleType_graph.show()

vehicleType_count.plot.bar()
plt.show()

plt.figure(figsize=(30,10))
sns.countplot(x='vehicleType', data=data)
plt.show()

plt.figure(figsize=(30,10))
sns.violinplot(x = "vehicleType", y = "price", data=data);
plt.show()

In [None]:
brand_count = data['brand'].value_counts()
print(brand_count)

print("\nNumber of Unique Values:")
print(data["brand"].nunique())

print("\nUnique Values:")
data.brand.unique()

In [None]:
brand_graph = px.bar(x=brand_count.index, y=brand_count.values, labels={'x':'Brand', 'y':'Number of Cars'})
brand_graph.show()

brand_count.plot.bar()
plt.show()

plt.figure(figsize=(30,10))
sns.countplot(x='brand', data=data)
plt.show()

plt.figure(figsize=(30,10))
sns.violinplot(x = "brand", y = "price", data=data);
plt.show()

In [None]:
model_count = data['model'].value_counts()
print(model_count)

print("\nNumber of Unique Values:")
print(data["model"].nunique())

print("\nUnique Values:")
data.model.unique()

In [None]:
fuelType_count = data['fuelType'].value_counts()
print(fuelType_count)

print("\nNumber of Unique Values:")
print(data["fuelType"].nunique())

print("\nUnique Values:")
data.fuelType.unique()

In [None]:
fuelType_graph = px.bar(x=fuelType_count.index, y=fuelType_count.values, labels={'x':'Fuel Type', 'y':'Number of Cars'})
fuelType_graph.show()

fuelType_count.plot.bar()
plt.show()

plt.figure(figsize=(30,10))
sns.countplot(x='fuelType', data=data)
plt.show()

plt.figure(figsize=(30,10))
sns.violinplot(x = "fuelType", y = "price", data=data);
plt.show()

In [None]:
yearOfRegistration_count = data['yearOfRegistration'].value_counts()
print(yearOfRegistration_count)

print("\nNumber of Unique Values:")
print(data["yearOfRegistration"].nunique())

print("\nUnique Values:")
data.yearOfRegistration.unique()

In [None]:
powerPS_count = data['powerPS'].value_counts()
print(powerPS_count)

print("\nNumber of Unique Values:")
print(data["powerPS"].nunique())

print("\nUnique Values:")
data.powerPS.unique()

In [None]:
gearbox_count = data['gearbox'].value_counts()
print(gearbox_count)

print("\nNumber of Unique Values:")
print(data["gearbox"].nunique())

print("\nUnique Values:")
data.gearbox.unique()

In [None]:
gearbox_graph = px.bar(x=gearbox_count.index, y=gearbox_count.values, labels={'x':'Gear Box', 'y':'Number of Cars'})
gearbox_graph.show()

gearbox_count.plot.bar()
plt.show()

plt.figure(figsize=(30,10))
sns.countplot(x='gearbox', data=data)
plt.show()

plt.figure(figsize=(30,10))
sns.violinplot(x = "gearbox", y = "price", data=data);
plt.show()

In [None]:
abtest_count = data['abtest'].value_counts()
print(abtest_count)

print("\nNumber of Unique Values:")
print(data["abtest"].nunique())

print("\nUnique Values:")
data.abtest.unique()

In [None]:
abtest_graph = px.bar(x=abtest_count.index, y=abtest_count.values, labels={'x':'Abtest', 'y':'Number of Cars'})
abtest_graph.show()

abtest_count.plot.bar()
plt.show()

plt.figure(figsize=(30,10))
sns.countplot(x='abtest', data=data)
plt.show()

plt.figure(figsize=(30,10))
sns.violinplot(x = "abtest", y = "price", data=data);
plt.show()

In [None]:
data_volkswagen = data.loc[data.brand == 'volkswagen']

In [None]:
# options 1
volkswagen_counts = data_volkswagen['model'].value_counts().plot(kind='bar')

In [None]:
# options 2
for brand in data['brand'].unique():
    data_brand = data.loc[data.brand == brand]
    model_counts = data_brand.groupby('model').size()
    plt.figure()
    model_counts.plot(kind='bar')
    plt.title(f'Models for {brand}')

In [None]:
top_brands = data['brand'].value_counts().nlargest(5)

data_models = pd.DataFrame()
for brand in top_brands.index:
    data_models[brand] = data[data['brand'] == brand]['model'].value_counts()
    
fig, ax = plt.subplots(1, 5, figsize=(25, 5))

for i, brand in enumerate(top_brands.index):
    data_models[brand].plot.bar(ax=ax[i])
    ax[i].set_title(brand)

# Show the plot
plt.show()

In [None]:
#  Her marka için en yüksek sayıya sahip modeli bulma
top_models = {}
for brand in top_brands.index:
    top_model = data_models[brand].idxmax()
    top_models[brand] = top_model
print(top_models)

In [None]:
print("Automatik Models:")
automatik_data = data[data['gearbox'] == 'automatik']
model_counts = automatik_data['model'].value_counts()
print(model_counts)
print("\n\nManuell Models:")
manuell_data = data[data['gearbox'] == 'manuell']
model_counts = manuell_data['model'].value_counts()
print(model_counts)

In [None]:
print("Benzin Models:")
benzin_data = data[data['fuelType'] == 'benzin']
model_counts = benzin_data['model'].value_counts()
print(model_counts)
print("\n\nDiesel Models:")
diesel_data = data[data['fuelType'] == 'diesel']
model_counts = diesel_data['model'].value_counts()
print(model_counts)
print("\n\nLpg Models:")
lpg_data = data[data['fuelType'] == 'lpg']
model_counts = lpg_data['model'].value_counts()
print(model_counts)
print("\n\nCng Models:")
cng_data = data[data['fuelType'] == 'cng']
model_counts = cng_data['model'].value_counts()
print(model_counts)

In [None]:
print("Limousine Models:")
limousine_data = data[data['vehicleType'] == 'limousine']
model_counts = limousine_data['model'].value_counts()
print(model_counts)
print("\n\nKleinwagen Models:")
kleinwagen_data = data[data['vehicleType'] == 'kleinwagen']
model_counts = kleinwagen_data['model'].value_counts()
print(model_counts)
print("\n\nKombi Models:")
kombi_data = data[data['vehicleType'] == 'kombi']
model_counts = kombi_data['model'].value_counts()
print(model_counts)
print("\n\nBus Models:")
bus_data = data[data['vehicleType'] == 'bus']
model_counts = bus_data['model'].value_counts()
print(model_counts)
print("\n\nCabrio Models:")
cabrio_data = data[data['vehicleType'] == 'cabrio']
model_counts = cabrio_data['model'].value_counts()
print(model_counts)
print("\n\nCoupe Models:")
coupe_data = data[data['vehicleType'] == 'coupe']
model_counts = coupe_data['model'].value_counts()
print(model_counts)
print("\n\nSuv Models:")
suv_data = data[data['vehicleType'] == 'suv']
model_counts = suv_data['model'].value_counts()
print(model_counts)
print("\n\nAndere Models:")
andere_data = data[data['vehicleType'] == 'andere']
model_counts = andere_data['model'].value_counts()
print(model_counts)

In [None]:
plt.figure(figsize=(20,10))
sns.distplot(data['yearOfRegistration'], kde=False)
plt.xlabel('Year of Registration')
plt.ylabel('Number of Cars')
plt.title('Distribution of Year of Registration for Used Cars')
plt.show()

In [None]:
current_year = 2023
data['age'] = current_year - data['yearOfRegistration']
plt.figure(figsize=(10, 5))
sns.scatterplot(x='age', y='powerPS', data=data)
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.scatterplot(x='powerPS', y='price', data=data)
plt.show()

In [None]:
current_year = 2023
data['age'] = current_year - data['yearOfRegistration']
plt.figure(figsize=(10, 5))
sns.scatterplot(x='age', y='kilometer', data=data)
plt.show()

In [None]:
current_year = 2023
data['age'] = current_year - data['yearOfRegistration']
plt.figure(figsize=(10, 5))
sns.scatterplot(x='age', y='price', data=data)
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.scatterplot(x='postalCode', y='price', data=data)
plt.show()

In [None]:
plt.figure(figsize=(30, 10))
sns.barplot(x='brand', y='price', data=data)
plt.show()

plt.figure(figsize=(30, 10))
sns.boxplot(x='brand', y='price', data=data)
plt.show()

In [None]:
plt.figure(figsize=(30, 10))
sns.barplot(x='vehicleType', y='price', data=data)
plt.show()

plt.figure(figsize=(10, 5))
sns.boxplot(x='vehicleType', y='price', data=data)
plt.show()

In [None]:
plt.figure(figsize=(30, 10))
sns.barplot(x='gearbox', y='price', data=data)
plt.show()

plt.figure(figsize=(10, 5))
sns.boxplot(x='gearbox', y='price', data=data)
plt.show()

In [None]:
plt.figure(figsize=(30, 10))
sns.barplot(x='fuelType', y='price', data=data)
plt.show()

plt.figure(figsize=(10, 5))
sns.boxplot(x='fuelType', y='price', data=data)
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.scatterplot(x='kilometer', y='price', data=data)
sns.regplot(x='kilometer', y='price', data=data)
plt.show()

In [None]:
data = data.sort_values(by='powerPS', ascending=False)
top_10_cars = data.head(10)
print(top_10_cars[['name','powerPS']])

In [None]:
data = data.sort_values(by='kilometer', ascending=False)
top_10_cars = data.head(10)
print(top_10_cars[['name','kilometer']])

In [None]:
data = data.sort_values(by='price', ascending=False)
top_10_cars = data.head(10)
print(top_10_cars[['name','price']])

In [None]:
data.seller.unique()

In [None]:
# Fiyat sütununu sayısal bir türe dönüştürme
seller_data = data[data['seller'].isin(['privat', 'gewerblich'])]
seller_data['price'] = pd.to_numeric(seller_data['price'])

# Her satıcı türü için ortalama fiyatı hesaplama
mean_prices = seller_data.groupby('seller')['price'].mean()
print(mean_prices)