In [None]:
!pip install chardet

In [None]:
import numpy as np
import pandas as pd
import chardet
import matplotlib.pyplot as plt
import re
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

import ydata_profiling as pp

In [None]:
with open("Watches Bags Accessories.csv", 'rb') as f:
    rawdata = f.read()
    result = chardet.detect(rawdata)
    encoding = result['encoding']

df = pd.read_csv("Watches Bags Accessories.csv", encoding=encoding)


In [None]:
df.head()

In [None]:
df.info()  

In [None]:
pp.ProfileReport(df)

In [None]:
# Drop rows with missing values 
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)

In [None]:
df.head()

In [None]:
print(df['Currency'].unique())
df.drop('Currency', axis=1)
print(df)

In [None]:
def convert_rating(sold_count):
    if '/5' in sold_count:
        return float(sold_count.replace('/5', ''))
    else:
        return 5
    
df['Rating in Stars'] = df['Rating in Stars'].apply(convert_rating)

In [None]:
def convert_delivery(delivary):
    if 'Free Delivery' in delivary:
        return float(0)

df['Delivery'] = df['Delivery'].apply(convert_delivery)

In [None]:
def convert_sold_count(sold_count):
    if 'K Sold' in sold_count:
        return int(sold_count.replace('K Sold', '')) * 1000
    elif ' Sold' in sold_count:
        return int(sold_count.replace(' Sold', ''))
    else:
        return int(sold_count)
    
df['Sold Count'] = df['Sold Count'].apply(convert_sold_count)

In [None]:
df['Voucher'].unique()

def convert_Voucher(Voucher):
    if ' Vouchers' in Voucher:
        return int(Voucher.replace(' Vouchers', ''))
    elif "0" in Voucher:
        return int(0)
    elif "0" in Voucher:
        return int(1)
    
df['Voucher'] = df['Voucher'].apply(convert_Voucher)

In [None]:
scaler = MinMaxScaler()
df[["current_price_scaled", "rating_count_scaled"]] = scaler.fit_transform(df[["Current Price", "Rating Count"]])

In [None]:
y = df["Category"]
X = df.drop('Category',axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state = 0)

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error:", mae)

In [None]:
category_of_interest = "Kids Watches"
category_data = df[df["Category"] == category_of_interest]

predicted_sales = model.predict(category_data[X.columns])
actual_sales = category_data["Sold Count"]

plt.plot(category_data["date"], actual_sales, label="Actual Sales")
plt.plot(category_data["date"], predicted_sales, label="Predicted Sales")
plt.legend()
plt.show()

In [None]:
feature_importances = pd.DataFrame({"feature": X.columns, "importance": model.feature_importances_})
print(feature_importances.sort_values(by="importance", ascending=False))

In [None]:
future_data = X_test
future_sales = model.predict(future_data)

# Visualize the forecasted trends
for category in df["Category"].unique():
    category_data = future_data[future_data["Category"] == category]
    predicted_sales = model.predict(category_data[X.columns])
    plt.plot(predicted_sales, label=category)

plt.legend()
plt.show()
future_sales = model.predict(future_data)

for category in df["Category"].unique():
    category_data = future_data[future_data["Category"] == category]
    predicted_sales = model.predict(category_data[X.columns])
    plt.plot(predicted_sales, label=category)

plt.legend()
plt.show()