Python Party Day 4: Google Search Results Page: User Interaction Patterns

You are a Product Analyst on the Google Search team investigating user engagement with search result pages. The team wants to understand how different numbers of search results impact user interaction time. Your analysis will help optimize the current search results presentation strategy.

Question 1 of 3

Identify and remove any duplicate entries in the dataset to ensure data quality. How many duplicates were found and removed?

In [None]:
import numpy as np
import pandas as pd


In [None]:
user_engagement_data = pd.read("user_engagement_data.csv")


In [None]:
# Copying data to avoid changes to original
ued_df = user_engagement_data.copy()

#Exploring data
print("Exploring data")
print("-" * 100)
print()

print("Checking data information")
print(ued_df.info())
print("-" * 100)
print()

print("Dataframe")
print(ued_df)
print("-" * 100)
print()

print("Descriptive statistics for each column on the DataFrame")
print(ued_df.describe())
print("-" * 100)
print()

print("Shape of Original Data")
print("There are", ued_df.shape[0], "rows and", ued_df.shape[1], "columns.")
print("-" * 100)
print()

print("Checking for Duplicate values")
duplicate_values = ued_df.duplicated().sum()
print('The number of duplicate values on the dataset is:');
print(duplicate_values)
print("-" * 100)
print()

# Identify all duplicate rows, including the first occurrence
all_duplicate_rows = ued_df[ued_df.duplicated(keep=False)]
print("Display all duplicate rows")
print(all_duplicate_rows)
print("-" * 100)
print()

drop_dupes = ued_df.drop_duplicates()
print("Droping duplicates")
print(drop_dupes.describe())
print("-" * 100)
print()

print("Shape of Dropped data")
print("There are", drop_dupes.shape[0], "rows and", drop_dupes.shape[1], "columns.")
print("-" * 100)
print()

print("The number of duplicate values on the dataset that were found and removed were", duplicate_values, "duplicate values.")


Question 2 of 3

After dropping duplicates, aggregate the data to find the average user interaction time for each number of search results displayed per page. What are the average interaction times?

In [None]:
 #Exploring data
print("Exploring data")
print("-" * 100)
print()

print("Checking data information")
print(ued_df.info())
print("-" * 100)
print()

print("Dataframe")
print(ued_df)
print("-" * 100)
print()

print("Descriptive statistics for each column on the DataFrame")
print(ued_df.describe())
print("-" * 100)
print()

print("Shape of Original Data")
print("There are", ued_df.shape[0], "rows and", ued_df.shape[1], "columns.")
print("-" * 100)
print()

print("Checking for Duplicate values")
duplicate_values = ued_df.duplicated().sum()
print('The number of duplicate values on the dataset is:');
print(duplicate_values)
print("-" * 100)
print()

# Identify all duplicate rows, including the first occurrence
all_duplicate_rows = ued_df[ued_df.duplicated(keep=False)]
print("Display all duplicate rows")
print(all_duplicate_rows)
print("-" * 100)
print()

dropped_df = ued_df.drop_duplicates()
print("Droping duplicates")
print(dropped_df.describe())
print("-" * 100)
print()

print("Shape of Dropped data")
print("There are", dropped_df.shape[0], "rows and", dropped_df.shape[1], "columns.")
print("-" * 100)
print()

print("The number of duplicate values on the dataset that were found and removed were", duplicate_values, "duplicate values.")
print("-" * 100)
print()
print("-" * 100)
print()

print("Average user interaction time for each number of search results displayed per page:")
clean_df = dropped_df.groupby("search_results_displayed")
agg_UI = clean_df["interaction_time"].agg(['mean']).reset_index()
print(agg_UI)
print("-" * 100)
print()



Question 3
Sort the aggregated results from Q2 to determine which number of search results per page has the highest average user interaction time. What is the optimal number of search results per page?

In [None]:
# Note: pandas and numpy are already imported as pd and np
# The following tables are loaded as pandas DataFrames with the same names: user_engagement_data
# Please print your final result or dataframe

  #Exploring data
print("Exploring data")
print("-" * 100)
print()

print("Checking data information")
print(ued_df.info())
print("-" * 100)
print()

print("Dataframe")
print(ued_df)
print("-" * 100)
print()

print("Descriptive statistics for each column on the DataFrame")
print(ued_df.describe())
print("-" * 100)
print()

print("Shape of Original Data")
print("There are", ued_df.shape[0], "rows and", ued_df.shape[1], "columns.")
print("-" * 100)
print()

print("Checking for Duplicate values")
duplicate_values = ued_df.duplicated().sum()
print('The number of duplicate values on the dataset is:');
print(duplicate_values)
print("-" * 100)
print()

# Identify all duplicate rows, including the first occurrence
all_duplicate_rows = ued_df[ued_df.duplicated(keep=False)]
print("Display all duplicate rows")
print(all_duplicate_rows)
print("-" * 100)
print()

dropped_df = ued_df.drop_duplicates()
print("Droping duplicates")
print(dropped_df.describe())
print("-" * 100)
print()

print("Shape of Dropped data")
print("There are", dropped_df.shape[0], "rows and", dropped_df.shape[1], "columns.")
print("-" * 100)
print()

print("The number of duplicate values on the dataset that were found and removed were", duplicate_values, "duplicate values.")
print("-" * 100)
print()
print("-" * 100)
print()

print("Average user interaction time for each number of search results displayed per page:")
clean_df = dropped_df.groupby("search_results_displayed")
agg_UI = clean_df["interaction_time"].agg(['mean']).reset_index()
print(agg_UI)
print("-" * 100)
print()

print("Average user interaction time for each number of search results displayed per page sorted from High to Low:")
Sorted_df = agg_UI.sort_values('mean', ascending=False).reset_index()
print(Sorted_df)
print("-" * 100)
print()

print("Number", Sorted_df.at[0, 'search_results_displayed'], "of search_results_displayed has the highest average interaction time with a value of", Sorted_df.at[0, 'mean']) 
