<a href="https://colab.research.google.com/github/ROhitasware/Website-Traffic-Analysis/blob/main/Traffic_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Step 1: Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Configure visualization styles
plt.style.use('ggplot')
sns.set_theme()

# Load the dataset
file_path = "/content/traffic.csv"  # Adjust this path if necessary
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print("Dataset Preview:")
data.head()


Dataset Preview:


Unnamed: 0,event,date,country,city,artist,album,track,isrc,linkid
0,click,2021-08-21,Saudi Arabia,Jeddah,Tesher,Jalebi Baby,Jalebi Baby,QZNWQ2070741,2d896d31-97b6-4869-967b-1c5fb9cd4bb8
1,click,2021-08-21,Saudi Arabia,Jeddah,Tesher,Jalebi Baby,Jalebi Baby,QZNWQ2070741,2d896d31-97b6-4869-967b-1c5fb9cd4bb8
2,click,2021-08-21,India,Ludhiana,Reyanna Maria,So Pretty,So Pretty,USUM72100871,23199824-9cf5-4b98-942a-34965c3b0cc2
3,click,2021-08-21,France,Unknown,"Simone & Simaria, Sebastian Yatra",No Llores Más,No Llores Más,BRUM72003904,35573248-4e49-47c7-af80-08a960fa74cd
4,click,2021-08-21,Maldives,Malé,Tesher,Jalebi Baby,Jalebi Baby,QZNWQ2070741,2d896d31-97b6-4869-967b-1c5fb9cd4bb8


In [3]:
# Step 2.1: Inspect the dataset
print("Dataset Information:")
data.info()

print("\nSummary Statistics:")
data.describe()

# Step 2.2: Check for missing values
print("\nMissing Values:")
print(data.isnull().sum())

# Step 2.3: Handle missing or anomalous data
# Example: Fill missing numeric values with the median
numeric_columns = data.select_dtypes(include=['float64', 'int64']).columns
data[numeric_columns] = data[numeric_columns].fillna(data[numeric_columns].median())

# Example: Drop rows with missing categorical values
categorical_columns = data.select_dtypes(include=['object']).columns
data = data.dropna(subset=categorical_columns)

# Recheck for missing values
print("\nMissing Values After Cleaning:")
print(data.isnull().sum())


Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 226278 entries, 0 to 226277
Data columns (total 9 columns):
 #   Column   Non-Null Count   Dtype 
---  ------   --------------   ----- 
 0   event    226278 non-null  object
 1   date     226278 non-null  object
 2   country  226267 non-null  object
 3   city     226267 non-null  object
 4   artist   226241 non-null  object
 5   album    226273 non-null  object
 6   track    226273 non-null  object
 7   isrc     219157 non-null  object
 8   linkid   226278 non-null  object
dtypes: object(9)
memory usage: 15.5+ MB

Summary Statistics:

Missing Values:
event         0
date          0
country      11
city         11
artist       37
album         5
track         5
isrc       7121
linkid        0
dtype: int64

Missing Values After Cleaning:
event      0
date       0
country    0
city       0
artist     0
album      0
track      0
isrc       0
linkid     0
dtype: int64


In [4]:
# Convert date column to datetime (if applicable)
if 'date' in data.columns:
    data['date'] = pd.to_datetime(data['date'])

# Group by date to analyze traffic trends
if 'date' in data.columns and 'page_views' in data.columns:
    daily_traffic = data.groupby('date')['page_views'].sum().reset_index()

    # Plot daily traffic trends
    plt.figure(figsize=(12, 6))
    plt.plot(daily_traffic['date'], daily_traffic['page_views'], color='blue')
    plt.title("Daily Traffic Trends", fontsize=16)
    plt.xlabel("Date", fontsize=14)
    plt.ylabel("Page Views", fontsize=14)
    plt.grid(True)
    plt.show()
