## Replace Null Values by Mean Values

In [2]:
import pandas as pd
import numpy as np
df = pd.read_csv('../Grunddatein/Zwischendatein/CleanedDataComplete.csv')

### Seller Sterne

In [3]:
    # Select the rows where 'sellerName' does not contain 'amazon'
    no_amazon_df = df[~df['sellerName'].str.contains('amazon', case=False)]

    # Calculate the rounded value of 'seller_sterne'
    average_sterne = no_amazon_df['seller_sterne'].mean()
    print(average_sterne)
    # Round to the nearest 0.5 or full value
    rounded_sterne = round(average_sterne * 2) / 2
    print(rounded_sterne)

    # Replace the null values in 'seller_sterne' with the rounded value for non-amazon rows, and with 5.0 for amazon rows
    df.loc[df['seller_sterne'].isnull() & ~df['sellerName'].str.contains('amazon', case=False), 'seller_sterne'] = rounded_sterne
    df.loc[df['seller_sterne'].isnull() & df['sellerName'].str.contains('amazon', case=False), 'seller_sterne'] = 5.0

    # Check the distinct values for 'seller_sterne' after the replacement
    new_distinct_seller_sterne = df['seller_sterne'].unique()
    new_distinct_seller_sterne = np.sort(new_distinct_seller_sterne)
    print("Distinct values for seller_sterne after replacement:", [f"{value:.1f}" for value in new_distinct_seller_sterne])

4.3098022738892325
4.5
Distinct values for seller_sterne after replacement: ['1.0', '1.5', '2.0', '2.5', '3.0', '3.5', '4.0', '4.5', '5.0']


### Seller Bewertung

In [4]:
mean_score = round(df['sellerbewertung'].mean(), 2)
sd_score = round(df['sellerbewertung'].std(), 2)
df.loc[df['sellerbewertung'].isnull() & ~df['sellerName'].str.contains('amazon', case=False), 'sellerbewertung'] = mean_score

### FBA Delivery Price and Delivery Duration

In [None]:
sub_df = df[df['Fulfillment_type'] == 'FBA']
mean_price = round(sub_df['lieferpreis'].mean(), 2)
standard_deviation = round(sub_df['lieferpreis'].std(), 2)
mean_delivery_time = round(sub_df['date_diff'].mean(), 2)
delivery_time_sd = round(sub_df['date_diff'].std(), 2)

print(f"Mean Delivery Price: {mean_price}")
print(f"Standard Deviation: {standard_deviation}")

df.loc[(df['date_diff'].isnull()) & (df['fulfilment_type'] == 'FBA'), 'lieferpreis'] = mean_price
df.loc[(df['date_diff'].isnull()) & (df['fulfilment_type'] == 'FBA'), 'date_diff'] = mean_delivery_time

### FBM Delivery Price and Delivery Duration

In [None]:
sub_df = df[df['Fulfillment_type'] == 'FBM']
mean_price = round(sub_df['lieferpreis'].mean(), 2)
standard_deviation = round(sub_df['lieferpreis'].std(), 2)
mean_delivery_time = round(sub_df['date_diff'].mean(), 2)
delivery_time_sd = round(sub_df['date_diff'].std(), 2)

print(f"Mean Delivery Price: {mean_price}")
print(f"Standard Deviation: {standard_deviation}")

df.loc[(df['date_diff'].isnull()) & (df['fulfilment_type'] == 'FBM'), 'lieferpreis'] = mean_price
df.loc[(df['date_diff'].isnull()) & (df['fulfilment_type'] == 'FBM'), 'date_diff'] = mean_delivery_time

In [None]:
## Overwrite File
df.to_csv('../Grunddatein/Zwischendatein/CleanedDataComplete.csv', index=False)