In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

# Set default plotly template
pio.templates.default = "plotly_white"

# Read the data from the CSV file
file_path = 'retail_price.csv'  # Since the file is in the same directory
retail_price_data = pd.read_csv(file_path)

# Print the first few rows of the dataframe
print(retail_price_data.head())

In [None]:
# Check for null values in the dataset
null_values = retail_price_data.isnull().sum()

# Print the null values
print("\nNull values in each column:")
print(null_values)

In [None]:
# Print the descriptive statistics of the dataset
print("\nDescriptive statistics of the dataset:")
print(retail_price_data.describe())

In [None]:
# Create a histogram of the total price
fig = px.histogram(retail_price_data, x='total_price', nbins=20, title='Distribution of Total Price')

# Update the y-axis to show the count
fig.update_layout(yaxis_title='Count')

# Show the figure
fig.show()

In [None]:
# Create a box plot to show the distribution of prices
fig = px.box(retail_price_data, y='unit_price', title='Box plot of unit price')

# Update the y-axis label
fig.update_layout(yaxis_title='Unit Price')

# Show the figure
fig.show()

In [None]:
# Create a scatter plot showing "Qty vs. Total Price" with a trendline
fig = px.scatter(retail_price_data, x='qty', y='total_price', trendline="ols", title='Quantity vs. Total Price')

# Update the axis labels
fig.update_layout(xaxis_title='Qty', yaxis_title='Total Price')

# Show the figure
fig.show()

In [None]:
# Create a scatter plot showing "Qty vs. Total Price" with a trendline
fig = px.scatter(retail_price_data, x='qty', y='total_price', trendline="ols", title='Quantity vs. Total Price')

# Update the axis labels and y-axis tick values
fig.update_layout(
    xaxis_title='Qty',
    yaxis_title='Total Price',
    yaxis=dict(
        tickmode='array',
        tickvals=[0, 2000, 4000, 6000, 8000, 10000, 12000],
        ticktext=['0k', '2k', '4k', '6k', '8k', '10k', '12k']
    )
)

# Show the figure
fig.show()

In [None]:
# Create a bar chart showing "Average total price by product category"
average_total_price_by_category = retail_price_data.groupby('product_category_name')['total_price'].mean().reset_index()

fig = px.bar(average_total_price_by_category, x='product_category_name', y='total_price', title='Average Total Price by Product Category')

# Update the axis labels
fig.update_layout(
    xaxis_title='Product Category Name',
    yaxis_title='Average Total Price'
)

# Show the figure
fig.show()

In [None]:
fig = px.bar(average_total_price_by_category, x='product_category_name', y='total_price', title='Average Total Price by Product Category')

# Update the axis labels
fig.update_layout(
    xaxis_title='Product Category Name',
    yaxis_title='Average Total Price'
)

# Show the figure
fig.show()

In [None]:
fig = px.bar(total_price_by_category, x='product_category_name', y='total_price', title='Total Price by Product Category')

# Update the axis labels
fig.update_layout(
    xaxis_title='Product Category Name',
    yaxis_title='Total Price'
)

# Show the figure
fig.show()

In [None]:
# Create a box plot showing "Total Price by Weekday"
fig = px.box(retail_price_data, x='weekday', y='total_price', title='Box Plot of Total Price by Weekday')

# Update the axis labels and y-axis tick values
fig.update_layout(
    xaxis_title='Weekday',
    yaxis_title='Total Price',
    yaxis=dict(
        tickmode='array',
        tickvals=[0, 2000, 4000, 6000, 8000, 10000, 12000],
        ticktext=['0k', '2k', '4k', '6k', '8k', '10k', '12k']
    )
)

# Show the figure
fig.show()

In [None]:
# Create a box plot showing "Total Price by Holiday"
fig = px.box(retail_price_data, x='holiday', y='total_price', title='Box Plot of Total Price by Holiday')

# Update the axis labels and y-axis tick values
fig.update_layout(
    xaxis_title='Holiday',
    yaxis_title='Total Price',
    yaxis=dict(
        tickmode='array',
        tickvals=[0, 2000, 4000, 6000, 8000, 10000, 12000],
        ticktext=['0k', '2k', '4k', '6k', '8k', '10k', '12k']
    )
)

# Show the figure
fig.show()

In [None]:
# Load the dataset
file_path = 'retail_price.csv'  # Ensure this path is correct
retail_price = pd.read_csv(file_path)

# Convert columns to numeric where possible
# Use pd.to_numeric with errors='coerce' to convert and handle non-numeric data
retail_price_numeric = retail_price.apply(pd.to_numeric, errors='coerce')

# Compute the correlation matrix
correlation_matrix = retail_price_numeric.corr()

# Create a heatmap of the correlation matrix
fig = go.Figure(data=go.Heatmap(
    x=correlation_matrix.columns,
    y=correlation_matrix.columns,
    z=correlation_matrix.values,
    colorscale='Viridis',  # Choose a colorscale, can change to 'Cividis', 'Plasma', etc.
    colorbar=dict(title='Correlation')
))

# Update layout with title and axis labels
fig.update_layout(
    title='Correlation Heatmap of Numerical Features',
    xaxis_title='Features',
    yaxis_title='Features',
    xaxis=dict(ticks='', nticks=len(correlation_matrix.columns), tickangle=45),
    yaxis=dict(ticks='', nticks=len(correlation_matrix.columns))
)

# Show the figure
fig.show()

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
import plotly.graph_objects as go

# Assume retail_price is already loaded with the appropriate dataset
# For example, using: retail_price = pd.read_csv('path_to_file.csv')

# Calculate competitor price difference
retail_price['comp_price_diff'] = retail_price['unit_price'] - retail_price['comp_1']

# Select the features and target variable
X = retail_price[['qty', 'unit_price', 'comp_1', 'product_score', 'comp_price_diff']]
y = retail_price['total_price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the regression model
model = DecisionTreeRegressor()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Create the scatter plot comparing actual and predicted values
fig = go.Figure()

# Add scatter plot for actual vs. predicted values
fig.add_trace(go.Scatter(
    x=y_test,
    y=y_pred,
    mode='markers',
    marker=dict(color='blue'),
    name='Predicted vs. Actual Retail Price'
))

# Add ideal prediction line
fig.add_trace(go.Scatter(
    x=[min(y_test), max(y_test)],
    y=[min(y_test), max(y_test)],
    mode='lines',
    line=dict(color='red'),
    name='Ideal Prediction'
))

# Update layout with titles, axis labels, and y-axis tick formatting
fig.update_layout(
    title='Predicted vs. Actual Retail Price',
    xaxis_title='Actual Retail Price',
    yaxis_title='Predicted Retail Price',
    yaxis=dict(
        tickvals=[0, 2000, 4000, 6000, 8000, 10000, 12000]
    )
)

# Show the figure
fig.show()
