In [None]:
from matplotlib import pyplot as plt


# Bereinigungsfunktion
def clean_row(row):
    try:
        row["discounted_price"] = float(row["discounted_price"].replace("₹", "").replace(",", ""))
        row["actual_price"] = float(row["actual_price"].replace("₹", "").replace(",", ""))
        row["discount_percentage"] = int(row["discount_percentage"].replace("%", ""))

        # Prüfen, ob "rating" ein String ist, bevor .replace() genutzt wird
        if isinstance(row["rating"], str):
            row["rating"] = float(row["rating"].replace("|", "").strip())
        else:
            row["rating"] = float(row["rating"])  # Falls es bereits ein Float ist

        row["rating_count"] = int(row["rating_count"].replace(",", "")) if pd.notnull(row["rating_count"]) else 0
        return row
    except Exception as e:
        print(f"❌ Fehler in Zeile mit product_id {row.product_id}: {e}")
        print(row)  # Problematische Zeile ausgeben
        return None


# clean the whole data
def clean_data(df):
    if df.empty:
         return "Der DataFrame ist leer, keine Daten verfügbar."
    return df.apply(clean_row, axis=1).dropna().reset_index(drop=True)


In [None]:
import pandas as pd

df = clean_data(pd.read_csv('amazon.csv'))


def top_discounted_products(df):
    # Ordered py percentage desc
    df_sorted = df.sort_values(by="discount_percentage", ascending=False)

    # select top 10
    df_top10 = df_sorted.head(10)

    # return needed data
    df_top10 = df_top10[["product_name", "actual_price", "discounted_price", "discount_percentage"]]

    return df_top10


print(top_discounted_products(df))


In [None]:
import pandas as pd

# Daten bereinigen und einlesen
df = clean_data(pd.read_csv('amazon.csv'))


# Durchschnittliche Bewertung berechnen
def average_rating(df):
    return df["rating"].mean(skipna=True)


print(average_rating(df))

In [None]:
import pandas as pd

df = clean_data(pd.read_csv('amazon.csv'))

def top_discounted_product(df):
    data = df.sort_values(by="discount_percentage", ascending=False).head(1)[["product_name", "actual_price", "discounted_price", "discount_percentage"]].iloc[0]

    # Ergebnis formatieren
    return {
        "product_name": data["product_name"],
        "actual_price": data["actual_price"],
        "discounted_price": data["discounted_price"],
        "discount_percentage": data["discount_percentage"]
    }


print(top_discounted_product(df))


In [None]:
import pandas as pd

df = clean_data(pd.read_csv('amazon.csv'))

def top_reviews(df):
    # Produkt mit den meisten Bewertungen finden
    top_product = df.loc[df["rating_count"].idxmax()]  # Zeile mit maximalen Bewertungen

    # Ergebnisse formatieren
    return {
        "product_name": top_product["product_name"],
        "number_of_reviews": top_product["rating_count"],
    }


print(top_reviews(df))


In [None]:
import pandas as pd

df = clean_data(pd.read_csv('amazon.csv'))

bedingung = (df["rating_count"] > 10) & (df["rating_count"] < 20)
ergebnisA = df.loc[bedingung, ["product_id", "rating_count"]]


print(ergebnisA)


In [None]:
import pandas as pd

df = clean_data(pd.read_csv('amazon.csv'))

bedingung = (df["rating"] >= 4.0) & (df["discount_percentage"] >= 50)
ergebnis = df.loc[bedingung, ["product_name", "discount_percentage",  "rating"]]

print(ergebnis)

In [None]:
import pandas as pd

df = clean_data(pd.read_csv('amazon.csv'))


def most_reviewed_product(df):
    # Produkt mit der höchsten Anzahl von Bewertungen holen
    top_product = df.loc[df["rating_count"].idxmax()]

    # Ergebnis als Dictionary formatieren
    return {
        "product_name": top_product["product_name"],
        "rating_count": top_product["rating_count"],
        "rating": top_product["rating"]
    }


# Formatierte Ausgabe
result = most_reviewed_product(df)
print("Produkt mit den meisten Bewertungen:")
print(result)

In [None]:
import pandas as pd

df = clean_data(pd.read_csv('amazon.csv'))


def most_category_products(df):
    # Gruppiere nach Kategorie und zähle die Produktanzahl
    category_counts = df.groupby("category")["product_id"].count()

    # Ermittel die Kategorie mit der höchsten Anzahl von Produkten
    top_category = category_counts.idxmax()
    number_of_products = category_counts.max()

    # Filtere den DataFrame für die Top-Kategorie
    top_category_data = df[df["category"] == top_category]

    # Berechne die durchschnittliche Bewertung und den Rabattanteil
    average_rating = top_category_data["rating"].mean(skipna=True)
    average_discount = top_category_data["discount_percentage"].mean(skipna=True)

    # Rückgabe der Ergebnisse
    return {
        "category": top_category,
        "number_of_products": number_of_products,
        "average_rating": average_rating,
        "average_discount": average_discount
    }


print(most_category_products(df))

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = clean_data(pd.read_csv('amazon.csv'))

category_counts = df["category"].value_counts()

plt.figure(figsize=(12, 6))
category_counts.plot(kind="bar", color="lightblue", edgecolor="black")
plt.title("Produktanzahl pro Kategorie")
plt.xlabel("Kategorien")
plt.ylabel("Anzahl der Produkte")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = clean_data(pd.read_csv('amazon.csv'))

category_counts = df.groupby("category")["product_id"].count().sort_values(ascending=False)
plt.figure(figsize=(12, 8))
category_counts.plot(kind="bar", color="skyblue", edgecolor="black")
plt.title("Produktverteilung nach Kategorien")
plt.xlabel("Kategorie")
plt.ylabel("Anzahl der Produkte")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd

df = clean_data(pd.read_csv('amazon.csv'))


def top_discounted_products(df):
    df["discount"] = df["actual_price"] - df["discounted_price"]
     # Auswahl der Top-10 Produkte
    top_10 = df.sort_values(by="discount", ascending=False).head(10)

    # Rückgabe als Liste von Dictionaries (besseres Format)
    return top_10[["product_name", "category", "actual_price", "discounted_price", "discount"]].to_dict(
        orient="records")


print(top_discounted_products(df))

In [72]:
def binary_search(arr, target):
    left, right = 0, len(arr) - 1

    while left <= right:
        mid = (left + right) // 2

        if arr[mid] == target:
            return mid
        elif arr[mid] < target:
            left = mid + 1
        else:
            right = mid - 1

    return -1

# Example usage
arr = [11, 3, 5, 7, 9, 1]
target = 1
result = binary_search(arr, target)
print("Element found at index:", result) if result != -1 else print("Element not found")

Element not found
