In [None]:
# Import required libraries
import pandas as pd

# Load the dataset
file_path = "/mnt/data/Cust_Purch_FakeData.csv"
data = pd.read_csv(file_path)

# Display dataset information
print("Dataset Overview:")
display(data.head())  # Use display() for better formatting in Jupyter
print("\nDataset Info:")
print(data.info())

# 1. Number of entries and columns
entries = data.shape[0]
columns = data.shape[1]
print(f"\n1. Number of Entries: {entries}, Number of Columns: {columns}")

# 2. Max, Min, and Mean age of customers
max_age = data['Age'].max()
min_age = data['Age'].min()
mean_age = data['Age'].mean()
print(f"\n2. Max Age: {max_age}, Min Age: {min_age}, Mean Age: {mean_age:.2f}")

# 3. Three most common customer names
common_names = data['Name'].value_counts().head(3)
print(f"\n3. Three Most Common Names:\n{common_names}")

# 4. Customers with the same phone number
duplicate_phones = data[data.duplicated(subset='Phone', keep=False)]
print("\n4. Customers with Duplicate Phone Numbers:")
display(duplicate_phones)

# 5. Customers with the profession "Structural Engineer"
structural_engineers = data[data['Profession'] == 'Structural Engineer']
print(f"\n5. Number of Structural Engineers: {len(structural_engineers)}")

# 6. Male Structural Engineers
male_structural_engineers = structural_engineers[structural_engineers['Gender'] == 'Male']
print(f"\n6. Number of Male Structural Engineers: {len(male_structural_engineers)}")

# 7. Female Structural Engineers from Alberta (AB)
female_ab_structural_engineers = structural_engineers[
    (structural_engineers['Gender'] == 'Female') & 
    (structural_engineers['Province'] == 'AB')
]
print("\n7. Female Structural Engineers from Alberta (AB):")
display(female_ab_structural_engineers)

# 8. Max, Min, and Average spending
max_spending = data['Spending'].max()
min_spending = data['Spending'].min()
average_spending = data['Spending'].mean()
print(f"\n8. Max Spending: {max_spending}, Min Spending: {min_spending}, Average Spending: {average_spending:.2f}")

# 9. Customers who did not spend anything
no_spending_customers = data[data['Spending'] == 0]
print("\n9. Customers Who Did Not Spend Anything:")
display(no_spending_customers)

# 10. Customers spending 100 CAD or more
loyal_customers = data[data['Spending'] >= 100]
print("\n10. Customers Spending 100 CAD or More:")
display(loyal_customers)

# 11. Emails associated with a specific credit card number
emails_with_card = data[data['CreditCardNumber'] == '5020000000000230']['Email']
print("\n11. Emails Associated with Credit Card 5020000000000230:")
display(emails_with_card)

# 12. Cards expiring in 2019
expiring_2019 = data[data['ExpiryDate'].str.contains('2019')]
print(f"\n12. Number of Cards Expiring in 2019: {len(expiring_2019)}")

# 13. Customers using Visa as their provider
visa_customers = data[data['CreditCardProvider'] == 'Visa']
print(f"\n13. Number of Visa Users: {len(visa_customers)}")

# 14. Customer who spent 100 CAD using Visa
visa_100_spenders = visa_customers[visa_customers['Spending'] == 100]
print("\n14. Customers Who Spent 100 CAD Using Visa:")
display(visa_100_spenders)

# 15. Two most common professions
common_professions = data['Profession'].value_counts().head(2)
print("\n15. Two Most Common Professions:")
print(common_professions)

# 16. Top 5 most popular email providers
data['EmailDomain'] = data['Email'].str.split('@').str[1]
top_email_providers = data['EmailDomain'].value_counts().head(5)
print("\n16. Top 5 Most Popular Email Providers:")
print(top_email_providers)

# 17. Customers using "am.edu" email
am_edu_emails = data[data['EmailDomain'] == 'am.edu']
print("\n17. Customers Using am.edu Email:")
display(am_edu_emails)

# 18. Day of the week with the most customers
data['PurchaseDate'] = pd.to_datetime(data['PurchaseDate'])
data['DayOfWeek'] = data['PurchaseDate'].dt.day_name()
popular_day = data['DayOfWeek'].value_counts().idxmax()
print(f"\n18. Day with the Most Customers: {popular_day}")
