In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

# Load the data
df = pd.read_csv('data_reliability.csv')

# Convert event_date to datetime
df['event_date'] = pd.to_datetime(df['event_date'])

# Calculate daily active users
daily_active_users = df.groupby('event_date')['user_id'].nunique()

# Calculate daily purchase events
daily_purchases = df[df['event_name'] == 'purchase'].groupby('event_date').size()

# Calculate Purchaser User Rate
purchaser_user_rate = (daily_purchases / daily_active_users) * 100

# Plot the Purchaser User Rate
plt.figure(figsize=(12, 6))
purchaser_user_rate.plot()
plt.title('Calculated Purchaser User Rate')
plt.xlabel('Date')
plt.ylabel('Purchaser User Rate (%)')
plt.grid(True)
plt.show()

# Investigate the spike on 2024-07-09
spike_date = '2024-07-09'
spike_data = df[df['event_date'] == spike_date]

# Analyze affected users
total_users = spike_data['user_id'].nunique()
affected_users = spike_data[spike_data['event_name'] == 'purchase']['user_id'].nunique()
affected_percentage = (affected_users / total_users) * 100

print(f"Percentage of affected users on {spike_date}: {affected_percentage:.2f}%")

# Analyze patterns among affected users
affected_user_data = spike_data[spike_data['user_id'].isin(spike_data[spike_data['event_name'] == 'purchase']['user_id'])]

# Operating system distribution
os_distribution = affected_user_data['operating_system'].value_counts(normalize=True) * 100
print("\nOperating System Distribution of Affected Users:")
print(os_distribution)

# Event type distribution
event_distribution = affected_user_data['event_name'].value_counts(normalize=True) * 100
print("\nEvent Type Distribution of Affected Users:")
print(event_distribution)

# Time pattern analysis
affected_user_data['event_hour'] = pd.to_datetime(affected_user_data['event_time'], unit='ms').dt.hour
hourly_distribution = affected_user_data['event_hour'].value_counts().sort_index()
plt.figure(figsize=(12, 6))
hourly_distribution.plot(kind='bar')
plt.title('Hourly Distribution of Events for Affected Users')
plt.xlabel('Hour of Day')
plt.ylabel('Number of Events')
plt.show()