In [None]:
import pandas as pd

# Daten laden
df = pd.read_csv("used_car_sales.csv")

# Nur verkaufte Fahrzeuge
df = df[df["Car Sale Status"] == "Sold"]

# Datentypen umwandeln (z.B. Datum, falls nötig)
df["Sold Date"] = pd.to_datetime(df["Sold Date"], errors="coerce")
df["Purchased Date"] = pd.to_datetime(df["Purchased Date"], errors="coerce")

# Auswahl der Features
features = ["Manufactured Year", "Mileage-KM", "Engine Power-HP", "Price-$", "Sales Rating"]
target = "Sold Price-$"

# Nur vollständige Zeilen
df = df[features + [target]].dropna()


In [None]:
from sklearn.model_selection import train_test_split

X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)


In [None]:
from sklearn.metrics import mean_absolute_error, r2_score

y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f}")
print(f"R² Score: {r2:.2f}")


In [None]:
sample = X_test.iloc[0]
true_value = y_test.iloc[0]
predicted = model.predict([sample])[0]

print("Eingabedaten:", sample.to_dict())
print(f"Tatsächlicher Verkaufspreis: {true_value}")
print(f"Vorhergesagter Verkaufspreis: {predicted:.2f}")
