# Day 5: Switch 2 Pre-sales Demand Forecasting

You are a Product Analyst working with the Nintendo Switch 2 pre-sales team to analyze regional pre-order patterns and customer segmentation. Your team needs to understand how different demographics influence pre-sale volumes across regions. You will leverage historical pre-sale transaction data to extract meaningful insights that can guide marketing strategies.

In [None]:
import pandas as pd
import numpy as np

pre_sale_data_data = [
  {
    "region": "North America",
    "customer_id": "C001",
    "pre_order_date": "2024-07-02",
    "demographic_group": "Gamer",
    "pre_order_quantity": 1
  },
  {
    "region": "Europe",
    "customer_id": "C002",
    "pre_order_date": "2024-07-03",
    "demographic_group": "Casual",
    "pre_order_quantity": 2
  },
  {
    "region": "Asia",
    "customer_id": "C003",
    "pre_order_date": "2024-07-04",
    "demographic_group": "Tech Enthusiast",
    "pre_order_quantity": 1
  },
  {
    "region": "Latin America",
    "customer_id": "C004",
    "pre_order_date": "2024-07-05",
    "demographic_group": "Family",
    "pre_order_quantity": 3
  },
  {
    "region": "Oceania",
    "customer_id": "C005",
    "pre_order_date": "2024-07-06",
    "demographic_group": "Student",
    "pre_order_quantity": 2
  },
  {
    "region": "North America",
    "customer_id": "C006",
    "pre_order_date": "2024-07-07",
    "demographic_group": "Gamer",
    "pre_order_quantity": 5
  },
  {
    "region": "Europe",
    "customer_id": "C007",
    "pre_order_date": "2024-07-08",
    "demographic_group": null,
    "pre_order_quantity": 2
  },
  {
    "region": null,
    "customer_id": "C008",
    "pre_order_date": "2024-07-09",
    "demographic_group": "Casual",
    "pre_order_quantity": 1
  },
  {
    "region": "Asia",
    "customer_id": "C009",
    "pre_order_date": "2024-07-10",
    "demographic_group": "Family",
    "pre_order_quantity": 4
  },
  {
    "region": "North America",
    "customer_id": "C010",
    "pre_order_date": "2024-07-11",
    "demographic_group": "Gamer",
    "pre_order_quantity": 1
  },
  {
    "region": "North America",
    "customer_id": "C010",
    "pre_order_date": "2024-07-11",
    "demographic_group": "Gamer",
    "pre_order_quantity": 1
  },
  {
    "region": "Europe",
    "customer_id": "C011",
    "pre_order_date": "2024-07-12",
    "demographic_group": "Student",
    "pre_order_quantity": 2
  },
  {
    "region": "Asia",
    "customer_id": "C012",
    "pre_order_date": "2024-07-13",
    "demographic_group": "Casual",
    "pre_order_quantity": 3
  },
  {
    "region": "Latin America",
    "customer_id": "C013",
    "pre_order_date": "2024-07-14",
    "demographic_group": "Tech Enthusiast",
    "pre_order_quantity": 2
  },
  {
    "region": "Oceania",
    "customer_id": "C014",
    "pre_order_date": "2024-07-15",
    "demographic_group": "Gamer",
    "pre_order_quantity": 5
  },
  {
    "region": "North America",
    "customer_id": "C015",
    "pre_order_date": "2024-07-16",
    "demographic_group": "Casual",
    "pre_order_quantity": 1
  },
  {
    "region": "Europe",
    "customer_id": "C016",
    "pre_order_date": "2024-07-17",
    "demographic_group": "Family",
    "pre_order_quantity": 4
  },
  {
    "region": "Asia",
    "customer_id": "C017",
    "pre_order_date": "2024-07-18",
    "demographic_group": "Student",
    "pre_order_quantity": 3
  },
  {
    "region": "Latin America",
    "customer_id": "C018",
    "pre_order_date": "2024-07-19",
    "demographic_group": "Gamer",
    "pre_order_quantity": 1
  },
  {
    "region": "Oceania",
    "customer_id": "C019",
    "pre_order_date": "2024-07-20",
    "demographic_group": "Tech Enthusiast",
    "pre_order_quantity": 2
  },
  {
    "region": "Oceania",
    "customer_id": "C019",
    "pre_order_date": "2024-07-20",
    "demographic_group": "Tech Enthusiast",
    "pre_order_quantity": 2
  },
  {
    "region": "North America",
    "customer_id": "C020",
    "pre_order_date": "2024-07-21",
    "demographic_group": "Family",
    "pre_order_quantity": 3
  },
  {
    "region": "Europe",
    "customer_id": "C021",
    "pre_order_date": "2024-07-22",
    "demographic_group": "Gamer",
    "pre_order_quantity": 2
  },
  {
    "region": "Asia",
    "customer_id": "C022",
    "pre_order_date": "2024-07-23",
    "demographic_group": "Casual",
    "pre_order_quantity": 1
  },
  {
    "region": "Latin America",
    "customer_id": "C023",
    "pre_order_date": "2024-07-24",
    "demographic_group": "Student",
    "pre_order_quantity": 4
  },
  {
    "region": "Oceania",
    "customer_id": "C024",
    "pre_order_date": "2024-07-25",
    "demographic_group": "Family",
    "pre_order_quantity": 2
  },
  {
    "region": "North America",
    "customer_id": "C025",
    "pre_order_date": "2024-07-26",
    "demographic_group": "Tech Enthusiast",
    "pre_order_quantity": 1
  },
  {
    "region": "Europe",
    "customer_id": "C026",
    "pre_order_date": "2024-07-27",
    "demographic_group": "Student",
    "pre_order_quantity": 5
  },
  {
    "region": "Asia",
    "customer_id": "C027",
    "pre_order_date": "2024-07-28",
    "demographic_group": "Gamer",
    "pre_order_quantity": 2
  },
  {
    "region": "Latin America",
    "customer_id": "C028",
    "pre_order_date": "2024-07-29",
    "demographic_group": "Casual",
    "pre_order_quantity": 3
  },
  {
    "region": "Oceania",
    "customer_id": "C029",
    "pre_order_date": "2024-07-30",
    "demographic_group": "Family",
    "pre_order_quantity": 1
  },
  {
    "region": "North America",
    "customer_id": "C030",
    "pre_order_date": "2024-08-01",
    "demographic_group": "Gamer",
    "pre_order_quantity": 1
  },
  {
    "region": "Asia",
    "customer_id": "C031",
    "pre_order_date": "2024-08-02",
    "demographic_group": null,
    "pre_order_quantity": 2
  },
  {
    "region": "Latin America",
    "customer_id": "C032",
    "pre_order_date": "2024-08-03",
    "demographic_group": "Tech Enthusiast",
    "pre_order_quantity": 3
  },
  {
    "region": "Oceania",
    "customer_id": "C033",
    "pre_order_date": "2024-08-04",
    "demographic_group": "Student",
    "pre_order_quantity": 1
  },
  {
    "region": "North America",
    "customer_id": "C034",
    "pre_order_date": "2024-08-05",
    "demographic_group": "Family",
    "pre_order_quantity": 4
  },
  {
    "region": "Europe",
    "customer_id": "C035",
    "pre_order_date": "2024-08-06",
    "demographic_group": "Gamer",
    "pre_order_quantity": 2
  },
  {
    "region": "Asia",
    "customer_id": "C036",
    "pre_order_date": "2024-08-07",
    "demographic_group": "Casual",
    "pre_order_quantity": 5
  },
  {
    "region": "Latin America",
    "customer_id": "C037",
    "pre_order_date": "2024-08-08",
    "demographic_group": "Family",
    "pre_order_quantity": 1
  },
  {
    "region": "Oceania",
    "customer_id": "C038",
    "pre_order_date": "2024-08-09",
    "demographic_group": "Tech Enthusiast",
    "pre_order_quantity": 2
  },
  {
    "region": "North America",
    "customer_id": "C039",
    "pre_order_date": "2024-08-10",
    "demographic_group": "Student",
    "pre_order_quantity": 10
  },
  {
    "region": "Europe",
    "customer_id": "C040",
    "pre_order_date": "2024-08-11",
    "demographic_group": "Family",
    "pre_order_quantity": 3
  },
  {
    "region": "Asia",
    "customer_id": "C041",
    "pre_order_date": "2024-08-12",
    "demographic_group": "Gamer",
    "pre_order_quantity": 1
  },
  {
    "region": "Latin America",
    "customer_id": "C042",
    "pre_order_date": "2024-08-13",
    "demographic_group": "Casual",
    "pre_order_quantity": 2
  },
  {
    "region": "Oceania",
    "customer_id": "C043",
    "pre_order_date": "2024-08-14",
    "demographic_group": "Student",
    "pre_order_quantity": 5
  },
  {
    "region": "North America",
    "customer_id": "C044",
    "pre_order_date": "2024-08-15",
    "demographic_group": "Tech Enthusiast",
    "pre_order_quantity": 2
  },
  {
    "region": "Europe",
    "customer_id": "C045",
    "pre_order_date": "2024-08-16",
    "demographic_group": "Family",
    "pre_order_quantity": 1
  },
  {
    "region": "Asia",
    "customer_id": "C046",
    "pre_order_date": "2024-08-17",
    "demographic_group": "Gamer",
    "pre_order_quantity": 3
  },
  {
    "region": "Latin America",
    "customer_id": "C047",
    "pre_order_date": "2024-08-18",
    "demographic_group": "Casual",
    "pre_order_quantity": 2
  },
  {
    "region": "Oceania",
    "customer_id": "C048",
    "pre_order_date": "2024-08-19",
    "demographic_group": null,
    "pre_order_quantity": 4
  },
  {
    "region": "North America",
    "customer_id": "C049",
    "pre_order_date": "2024-08-20",
    "demographic_group": "Student",
    "pre_order_quantity": 1
  },
  {
    "region": "Europe",
    "customer_id": "C050",
    "pre_order_date": "2024-08-21",
    "demographic_group": "Gamer",
    "pre_order_quantity": 2
  },
  {
    "region": "Asia",
    "customer_id": "C051",
    "pre_order_date": "2024-08-22",
    "demographic_group": "Casual",
    "pre_order_quantity": 3
  },
  {
    "region": "Latin America",
    "customer_id": "C052",
    "pre_order_date": "2024-08-23",
    "demographic_group": "Tech Enthusiast",
    "pre_order_quantity": 2
  },
  {
    "region": "Oceania",
    "customer_id": "C053",
    "pre_order_date": "2024-08-24",
    "demographic_group": "Family",
    "pre_order_quantity": 1
  },
  {
    "region": "North America",
    "customer_id": "C054",
    "pre_order_date": "2024-08-25",
    "demographic_group": "Gamer",
    "pre_order_quantity": 1
  },
  {
    "region": "Europe",
    "customer_id": "C055",
    "pre_order_date": "2024-08-26",
    "demographic_group": "Casual",
    "pre_order_quantity": 2
  },
  {
    "region": "Asia",
    "customer_id": "C056",
    "pre_order_date": "2024-08-27",
    "demographic_group": "Student",
    "pre_order_quantity": 3
  },
  {
    "region": "Latin America",
    "customer_id": "C057",
    "pre_order_date": "2024-08-28",
    "demographic_group": "Family",
    "pre_order_quantity": 4
  },
  {
    "region": "Oceania",
    "customer_id": "C058",
    "pre_order_date": "2024-08-29",
    "demographic_group": "Tech Enthusiast",
    "pre_order_quantity": 1
  }
]
pre_sale_data = pd.DataFrame(pre_sale_data_data)


## Question 1

What percentage of records have missing values in at least one column? Handle the missing values, so that we have a cleaned dataset to work with.

In [None]:
# Note: pandas and numpy are already imported as pd and np
# The following tables are loaded as pandas DataFrames with the same names: pre_sale_data
# Please print your final result or dataframe

# Creating a copy of the pre-sale data to avoid modifying the original dataset
data = pre_sale_data
ns2_psd_df = data.copy()

ns2_psd_df

# Getting initial data info
ns2_psd_df.info()

# Finding out how many records have missing values in at least one column
missing_values = ns2_psd_df.isnull().sum()
print(missing_values)

# Calculating the percentage of missing values
percent_missing_values = ((missing_values * 100) / len(ns2_psd_df)).round(2)
print(percent_missing_values)

percent_rows_with_missing = (ns2_psd_df.isnull().any(axis=1).mean() * 100).round(2)
print(percent_rows_with_missing)

duplicate_records = ns2_psd_df.duplicated().sum()
print(duplicate_records)

# Identify all duplicate rows, including the first occurrence
all_duplicate_rows = ns2_psd_df[ns2_psd_df.duplicated(keep=False)]
# Display all duplicate rows
print(all_duplicate_rows)

# Handling missing values
# Since the missing values are objects, I will use the Mode to fill them
Clean_psd_df = ns2_psd_df.fillna(
    {
        "region": ns2_psd_df["region"].mode()[0],
        "demographic_group": ns2_psd_df["demographic_group"].mode()[0],
    }
)
Clean_psd_df.info()


# Printing the answer for question 1
print("The number of missing values in the data set are:")
print(missing_values)
print()

print("The percentage of records with missing values in the data set are:")
print(percent_missing_values)
print()

print(
    "The percentage of rows with at least one missing value in the data set is",
    percent_rows_with_missing,
), "%"
print()

print("The count of duplicate records in the data set are:")
print(duplicate_records)
print()

print("Clean version of the data set with handled missing values:")
print(duplicate_records)
print(Clean_psd_df)

## Question 2

Using the cleaned data, calculate the total pre-sale orders per month for each region and demographic group.

In [None]:
# Creating a copy of the pre-sale data to avoid modifying the original dataset
data = pre_sale_data
ns2_psd_df = data.copy()

print(ns2_psd_df)
print("-" * 100)
print()

# Getting initial data info
ns2_psd_df.info()
print("-" * 100)
print()

# Finding out how many records have missing values in at least one column
missing_values = ns2_psd_df.isnull().sum()
print(missing_values)
print("-" * 100)
print()

# Calculating the percentage of missing values per row
percent_missing_values = ((missing_values * 100) / len(ns2_psd_df)).round(2)
print(percent_missing_values)
print("-" * 100)
print()

# Calculating the percentage of rows with missing values
percent_rows_with_missing = (ns2_psd_df.isnull().any(axis=1).mean() * 100).round(2)
print(percent_rows_with_missing)
print("-" * 100)
print()

# Calculating the complete duplicate records
duplicate_records = ns2_psd_df.duplicated().sum()
print("The number of duplicate values on the data set is:", duplicate_records)
print("-" * 100)
print()

# Identify all duplicate rows, including the first occurrence
all_duplicate_rows = ns2_psd_df[ns2_psd_df.duplicated(keep=False)]
# Display all duplicate rows
print(all_duplicate_rows)
print("-" * 100)
print()

# Handling missing values
# Since the missing values are objects, I will use the Mode to fill them
region_mode = ns2_psd_df["region"].mode()[0]
demographic_group_mode = ns2_psd_df["demographic_group"].mode()[0]

print("Region Mode:", region_mode)
print("Demographic Group Mode:", demographic_group_mode)
print("-" * 100)
print()

Clean_psd_df = ns2_psd_df.fillna(
    {
        "region": region_mode,
        "demographic_group": demographic_group_mode,
    }
)
print(Clean_psd_df.info())
print("-" * 100)
print()

# Printing the answer for question 1
print("The number of missing values in the data set are:")
print(missing_values)
print()

print("The percentage of records with missing values in the data set are:")
print(percent_missing_values)
print()

print(
    "The percentage of rows with at least one missing value in the data set is",
    percent_rows_with_missing,
), "%"
print()

print("The count of duplicate records in the data set are:")
print(duplicate_records)
print()

print("Clean version of the data set with handled missing values:")
print(Clean_psd_df)

print(Clean_psd_df.info())
print("-" * 100)
print()

# We can see that "pre_order_date has been turned into an object instead of its original datetime format.
# We will transform it back for further analysis
Clean_psd_df["pre_order_date"] = pd.to_datetime(Clean_psd_df["pre_order_date"], format="%Y-%m-%d")
print(Clean_psd_df.info())
print("-" * 100)
print()

#We first need to group the data by month 
month_grouper = pd.Grouper(key="pre_order_date", freq="M")
print(month_grouper)
print("-" * 100)
print()

# Answer
Grouping_df = (
    Clean_psd_df.groupby(
        [month_grouper, "region", "demographic_group"], as_index=False)
    .agg(total_orders=("pre_order_quantity", "sum"))
    .sort_values(by=["pre_order_date", "region", "demographic_group"])
)
print(Grouping_df)
print("-" * 100)
print()

## Question 3

Predict the total pre-sales quantity for each region for September 2024. Assume that growth rate from August to September, is the same as the growth rate from July to August in each region.

In [None]:
# Creating a copy of the pre-sale data to avoid modifying the original dataset
data = pre_sale_data
ns2_psd_df = data.copy()

print(ns2_psd_df)
print("-" * 100)
print()

# Getting initial data info
ns2_psd_df.info()
print("-" * 100)
print()

# Finding out how many records have missing values in at least one column
missing_values = ns2_psd_df.isnull().sum()
print(missing_values)
print("-" * 100)
print()

# Calculating the percentage of missing values per row
percent_missing_values = ((missing_values * 100) / len(ns2_psd_df)).round(2)
print(percent_missing_values)
print("-" * 100)
print()

# Calculating the percentage of rows with missing values
percent_rows_with_missing = (ns2_psd_df.isnull().any(axis=1).mean() * 100).round(2)
print(percent_rows_with_missing)
print("-" * 100)
print()

# Calculating the complete duplicate records
duplicate_records = ns2_psd_df.duplicated().sum()
print("The number of duplicate values on the data set is:", duplicate_records)
print("-" * 100)
print()

# Identify all duplicate rows, including the first occurrence
all_duplicate_rows = ns2_psd_df[ns2_psd_df.duplicated(keep=False)]
# Display all duplicate rows
print(all_duplicate_rows)
print("-" * 100)
print()

# Handling missing values
# Since the missing values are objects, I will use the Mode to fill them
region_mode = ns2_psd_df["region"].mode()[0]
demographic_group_mode = ns2_psd_df["demographic_group"].mode()[0]

print("Region Mode:", region_mode)
print("Demographic Group Mode:", demographic_group_mode)
print("-" * 100)
print()

Clean_psd_df = ns2_psd_df.fillna(
    {
        "region": region_mode,
        "demographic_group": demographic_group_mode,
    }
)
print(Clean_psd_df.info())
print("-" * 100)
print()

# Printing the answer for question 1
print("The number of missing values in the data set are:")
print(missing_values)
print()

print("The percentage of records with missing values in the data set are:")
print(percent_missing_values)
print()

print(
    "The percentage of rows with at least one missing value in the data set is",
    percent_rows_with_missing,
), "%"
print()

print("The count of duplicate records in the data set are:")
print(duplicate_records)
print()

print("Clean version of the data set with handled missing values:")
print(Clean_psd_df)

#####################################################################

print(Clean_psd_df.info())
print("-" * 100)
print()

# We can see that "pre_order_date has been turned into an object instead of its original datetime format.
# We will transform it back for further analysis
Clean_psd_df["pre_order_date"] = pd.to_datetime(Clean_psd_df["pre_order_date"], format="%Y-%m-%d")
print(Clean_psd_df.info())
print("-" * 100)
print()

#We first need to group the data by month 
month_grouper = pd.Grouper(key="pre_order_date", freq="M")
print(month_grouper)
print("-" * 100)
print()

# Answer
Grouping_df = (
    Clean_psd_df.groupby(
        [month_grouper, "region", "demographic_group"], as_index=False)
    .agg(total_orders=("pre_order_quantity", "sum"))
    .sort_values(by=["pre_order_date", "region", "demographic_group"])
)
print(Grouping_df)
print("-" * 100)
print()

################################################################
print(Clean_psd_df.info())
print()

#To answer this question a bit more efficiently we will create a month column so we can easily filter the data
Clean_psd_df["pre_order_year"] = Clean_psd_df["pre_order_date"].dt.year
Clean_psd_df["pre_order_month"] = Clean_psd_df["pre_order_date"].dt.month
print(Clean_psd_df)
print("-" * 100)
print()

Grouping_df = (
    Clean_psd_df.groupby(
        ["pre_order_month", "region", "demographic_group"], as_index=False)
    .agg(total_orders=("pre_order_quantity", "sum"))
    .sort_values(by=["pre_order_month", "region", "demographic_group"])
)
print(Grouping_df)
print("-" * 100)
print()

new_grouping = Grouping_df.groupby(
        ["pre_order_month", "region"])["total_orders"].sum().reset_index()
print(new_grouping)
print("-" * 100)
print()

Pivot_df = new_grouping.pivot(index= 'region', columns='pre_order_month', values=['total_orders']).reset_index()
print(Pivot_df)
print("-" * 100)
print()

JulAug_Growth_rate = Pivot_df['total_orders', 8] / Pivot_df['total_orders', 7]  # Growth rate from July to August
print("Growth rate from July to August:")
print(JulAug_Growth_rate)
print()

# Adding the growth rate to the pivot DataFrame
Pivot_df["Jul-Aug_Growth_Rate"] = JulAug_Growth_rate

Pivot_df_growth = Pivot_df
print(Pivot_df_growth)
print("-" * 100)
print()

Predicted_September = Pivot_df_growth['total_orders', 8] * Pivot_df_growth['Jul-Aug_Growth_Rate']
print("Predicted pre-sales quantity for September 2024:")
print(Predicted_September)

#Adding the predicted September values to the DataFrame
Pivot_df_growth["September_Predicted_presales"] = Predicted_September.round(0)

Predicted_September_presales = Pivot_df_growth
print("Predicted pre-sales quantity for September 2024 by region:")
print(Predicted_September_presales)
print("-" * 100)
print()



Pivot_df.columns = [
    c if isinstance(c, str) else f"{c[0]}{c[1]}"
    for c in Pivot_df.columns
]
print(Pivot_df.columns)
print()

final_df = Pivot_df.rename(columns={
    "total_orders7": "July 2024",
    "total_orders8": "August 2024",
    "Jul-Aug_Growth_Rate": "growth_rate",
    "September_Predicted_presales": "September 2024 (predicted)"
})
print(final_df)
print()

# Answering the final question by selecting the relevant columns
final_df = final_df[["region", "July 2024", "August 2024", "growth_rate", "September 2024 (predicted)"]]
final_df

Made with ❤️ by [Interview Master](https://www.interviewmaster.ai)