In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv("py_demo_client_extension_30_20250221075805.csv")

# Display basic information
df.info()
df.head()

In [None]:
# Convert timestamps to datetime format
if 'eventtime' in df.columns:
    df['eventtime'] = pd.to_datetime(df['eventtime'], errors='coerce')
    df['hour'] = df['eventtime'].dt.hour
    df['day_of_week'] = df['eventtime'].dt.day_name()

In [None]:
# Handle missing values
df.dropna(inplace=True)

# Remove duplicates
df.drop_duplicates(inplace=True)

In [None]:
# Visualization 1: Top 10 Most Visited Websites
if 'url' in df.columns:
    top_websites = df['url'].value_counts().head(10)
    plt.figure(figsize=(10,5))
    sns.barplot(x=top_websites.index, y=top_websites.values, palette='coolwarm')
    plt.xticks(rotation=45)
    plt.title("Top 10 Most Visited Websites")
    plt.xlabel("Website")
    plt.ylabel("Visit Count")
    plt.show()

In [None]:
# Visualization 2: Browsing Activity by Hour
if 'hour' in df.columns:
    plt.figure(figsize=(10,5))
    sns.lineplot(x=df['hour'].value_counts().index, y=df['hour'].value_counts().values)
    plt.title("Browsing Activity by Hour")
    plt.xlabel("Hour of the Day")
    plt.ylabel("Number of Visits")
    plt.grid()
    plt.show()

In [None]:
# Visualization 3: Browsing Activity by Day of the Week
if 'day_of_week' in df.columns:
    plt.figure(figsize=(10,5))
    sns.countplot(x=df['day_of_week'], order=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'], palette='viridis')
    plt.title("Browsing Activity by Day of the Week")
    plt.xlabel("Day")
    plt.ylabel("Visit Count")
    plt.show()

In [None]:
# Visualization 4: User Flow Analysis (Visit vs. Referring Visit)
if 'visitId' in df.columns and 'referringVisitId' in df.columns:
    plt.figure(figsize=(8,5))
    sns.scatterplot(x=df['visitId'], y=df['referringVisitId'], alpha=0.5)
    plt.title("User Flow Analysis: Visit vs. Referring Visit")
    plt.xlabel("visitId")
    plt.ylabel("referringVisitId")
    plt.show()

In [None]:
# Visualization 5: Website Transition Type Distribution
if 'transition' in df.columns:
    plt.figure(figsize=(10,5))
    df['transition'].value_counts().plot(kind='pie', autopct='%1.1f%%', cmap='Set2')
    plt.title("Website Transition Type Distribution")
    plt.ylabel("")
    plt.show()