In [1]:
import requests
import csv
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
import plotly.express as px
from plotly.subplots import make_subplots

In [6]:
API_KEY = "AIzaSyDX_kJ-HB9cs0Z8KYDvzhENTDWw3oKTTdQ"
API_URL = "https://pagespeedonline.googleapis.com/pagespeedonline/v5/runPagespeed"
INPUT_FILE = "../../Warehouse/Links/fam_links.csv"
OUTPUT_FILE = "website_metrics.csv"

In [7]:
def read_urls_from_csv(file):
    with open(file, "r", newline="", encoding="utf-8") as csvfile:
        reader = csv.reader(csvfile)
        urls = set()
        for row in reader:
            urls.add(row[0])
        return list(urls)

def evaluate_website(url):
    params = {"url": url, "key": API_KEY}
    response = requests.get(API_URL, params=params)
    response_json = response.json()

    if "lighthouseResult" not in response_json:
        return None

    overall_score = response_json["lighthouseResult"]["categories"]["performance"]["score"]

    metrics = {
        "url": url,
        "overall_score": format(overall_score, ".8f"),
        "first_contentful_paint": response_json["lighthouseResult"]["audits"]["first-contentful-paint"]["numericValue"],
        "speed_index": response_json["lighthouseResult"]["audits"]["speed-index"]["numericValue"],
        "largest_contentful_paint": response_json["lighthouseResult"]["audits"]["largest-contentful-paint"]["numericValue"],
        "interactive": response_json["lighthouseResult"]["audits"]["interactive"]["numericValue"],
    }
    return metrics

def save_results_to_csv(result, file):
    with open(file, "a", newline="", encoding="utf-8") as csvfile:
        fieldnames = ["url", "overall_score", "first_contentful_paint", "speed_index", "largest_contentful_paint", "interactive"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        if csvfile.tell() == 0:
            writer.writeheader()

        writer.writerow(result)

def save_failed_urls_to_csv(failed_urls, file):
    with open(file, "a", newline="", encoding="utf-8") as csvfile:
        fieldnames = ["url", "error"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for failed_url in failed_urls:
            writer.writerow(failed_url)

In [9]:
urls = read_urls_from_csv(INPUT_FILE)
total_urls = len(urls)
failed_urls = []

for index, url in enumerate(urls):
    try:
        result = evaluate_website(url)
        if result:
            save_results_to_csv(result, OUTPUT_FILE)
            print(f"Progress: {index + 1}/{total_urls} - URL: {url} - Status: Success")
        else:
            error_message = "'lighthouseResult' not found"
            failed_urls.append({"url": url, "error": error_message})
            print(f"Progress: {index + 1}/{total_urls} - URL: {url} - Status: Failed - Error: {error_message}")
    except Exception as e:
        error_message = str(e)
        failed_urls.append({"url": url, "error": error_message})
        print(f"Progress: {index + 1}/{total_urls} - URL: {url} - Status: Failed - Error: {error_message}")
        save_failed_urls_to_csv(failed_urls, "failed_urls.csv")

Progress: 1/166 - URL: https://fampay.in/blog/healthy-wealth-how-can-parents-have-a-healthy-discussion-about-money-with-their-children/ - Status: Failed - Error: unsupported format string passed to NoneType.__format__
Progress: 2/166 - URL: https://fampay.in/partner - Status: Success
Progress: 3/166 - URL: https://fampay.in/blog/author/shifa/ - Status: Success
Progress: 4/166 - URL: https://fampay.in/blog/zetas-fusion-to-power-fampays-cashless-payments-for-gen-z/ - Status: Failed - Error: unsupported format string passed to NoneType.__format__
Progress: 5/166 - URL: https://fampay.in/blog/pocket-money-for-teenagers-why-it-matters/ - Status: Failed - Error: unsupported format string passed to NoneType.__format__
Progress: 6/166 - URL: https://fampay.in/blog/this-fintech-startup-by-iit-roorkee-alum-is-giving-teenagers-financial-freedom-responsibly-yourstory/ - Status: Failed - Error: 'lighthouseResult' not found
Progress: 7/166 - URL: https://fampay.in/blog/author/priyanka/ - Status: Suc

In [2]:
data = pd.read_csv('website_metrics.csv')

In [3]:
fig1 = go.Figure()

fig1.add_trace(
    go.Scatter(
        x=data["url"], 
        y=data["overall_score"],
        mode='markers',
        marker=dict(
            size=15,
            color=data["overall_score"], 
            colorscale='Viridis', 
            showscale=True
        ),
        text=data["url"],
    )
)

fig1.update_layout(title='Overall Score', xaxis_title='URL', yaxis_title='Score', hovermode='closest')

pio.write_html(fig1, "../../fam-report-site/public/Web-Analytics/overall_score.html")
fig1.show()

In [4]:
data['size'] = data['largest_contentful_paint'] / 100

fig = go.Figure(data=go.Scatter(
    x=data['overall_score'],
    y=data['first_contentful_paint'],
    mode='markers',
    marker=dict(
        size=data['size'],
        sizemode='area',
        sizeref=0.01,
        sizemin=4
    ),
    text=data['url'],
))

fig.update_layout(
    title='Overall Score vs First Contentful Paint (Size: Largest Contentful Paint)',
    xaxis=dict(
        title='Overall Score',
        gridcolor='white',
        type='log',
        gridwidth=2,
    ),
    yaxis=dict(
        title='First Contentful Paint',
        gridcolor='white',
        gridwidth=2,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
)

# Save plot
plot_filename = "../../fam-report-site/public/Web-Analytics/bubble_chart.html"
pio.write_html(fig, plot_filename)
fig.show()

In [5]:
fig = go.Figure(data=[go.Scatter3d(
    x=data['overall_score'],
    y=data['first_contentful_paint'],
    z=data['interactive'],
    mode='markers',
    marker=dict(
        size=data['largest_contentful_paint'] / 100,
        sizemode='area',
        sizeref=0.01,
        sizemin=4,
        color=data['speed_index'], 
        colorscale='Viridis', 
        colorbar=dict(title="Speed Index"),
        opacity=0.8
    ),
    text=data['url'],
)])

fig.update_layout(
    title='3D Scatter Plot: Overall Score, First Contentful Paint, Interactive, Largest Contentful Paint and Speed Index',
    scene=dict(
        xaxis=dict(title='Overall Score'),
        yaxis=dict(title='First Contentful Paint'),
        zaxis=dict(title='Interactive'),
    ),
    autosize=False,
    width=800,
    height=800,
)

# Save plot
plot_filename = "../../fam-report-site/public/Web-Analytics/3d_scatter_plot.html"
pio.write_html(fig, plot_filename)
fig.show()

In [6]:
fig = px.treemap(data, path=['url'], values='overall_score',
                 color='overall_score', hover_data=['overall_score'],
                 color_continuous_scale='RdBu', title='Overall Score Treemap')

fig.write_html("../../fam-report-site/public/Web-Analytics/overall_score_treemap.html")
fig.show()

In [7]:
# Convert overall_score to numeric as it was saved in the csv as a string with decimal precision
data['overall_score'] = pd.to_numeric(data['overall_score'])

fig = px.scatter_matrix(data,
    dimensions=["overall_score", "first_contentful_paint", "speed_index", "largest_contentful_paint", "interactive"],
    title="Website Metrics Scatterplot Matrix")
fig.update_traces(diagonal_visible=False)  # Hide diagonal distribution plots
fig.write_html("../../fam-report-site/public/Web-Analytics/scatterplot_matrix.html")
fig.show()


iteritems is deprecated and will be removed in a future version. Use .items instead.



In [8]:
color_scale = px.colors.diverging.Tealrose  # choose any available color scale

# Specify only numeric columns
numeric_cols = ["overall_score", "first_contentful_paint", "speed_index", "largest_contentful_paint", "interactive"]

# Normalize your data for better visualization, especially if the metrics are on different scales
data_normalized = (data[numeric_cols] - data[numeric_cols].min()) / (data[numeric_cols].max() - data[numeric_cols].min())

# Append the 'url' column from the original dataframe
data_normalized['url'] = data['url']

# Append the 'url' column from the original dataframe
data_normalized['url'] = data['url']

fig = px.parallel_coordinates(
    data_normalized, 
    color=data_normalized["overall_score"] * 100,  # or any other variable of interest
    labels={
        "url": "URL",
        "overall_score": "Overall Score",
        "first_contentful_paint": "First Contentful Paint",
        "speed_index": "Speed Index",
        "largest_contentful_paint": "Largest Contentful Paint",
        "interactive": "Interactive"
    },
    color_continuous_scale=color_scale,
    color_continuous_midpoint=0.5  # This can be changed to adjust the color scale midpoint
)

# Modify the layout
fig.update_layout(
    title='Parallel Coordinates Plot of Website Metrics',
    autosize=False,
    width=800,
    height=500
)

# Save the plot
pio.write_html(fig, "../../fam-report-site/public/Web-Analytics/parallel_coordinates.html")
fig.show()


iteritems is deprecated and will be removed in a future version. Use .items instead.

