# 3D Visualizations for GitHub Language Analysis

Explore interactive 3D visualizations built from the enriched repository dataset. These charts highlight how popularity, activity, and enterprise readiness intersect across languages.

## Workflow Overview

1. Load the consolidated repository dataset.
2. Aggregate language-level metrics for comparative analysis.
3. Generate interactive 3D figures and export them to the `public/visualizations` directory for dashboard integration.

In [1]:
from pathlib import Path

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

pio.templates.default = "plotly_dark"

data_path = Path("../data/raw/repositories_enriched.csv")
if not data_path.exists():
    raise FileNotFoundError(f"Expected dataset at {data_path.resolve()}")

df = pd.read_csv(data_path)
print(f"Loaded {len(df):,} repositories spanning {df['language'].nunique()} languages.")
df.head(3)

Loaded 1,200 repositories spanning 12 languages.


Unnamed: 0,id,name,full_name,owner,description,language,created_at,updated_at,pushed_at,stars,...,quarter_commit_share,issue_to_commit_ratio,freshness_index,support_load,compliance_score,enterprise_ready,maturity_score,growth_signal,growth_segment,compliance_tier
0,184456251,PowerToys,microsoft/PowerToys,microsoft,Microsoft PowerToys is a collection of utiliti...,C#,2019-05-01 17:44:02+00:00,2025-10-18 22:06:09+00:00,2025-10-18 21:26:55+00:00,124775,...,1.0,73.405941,1.0,17.908213,1.0,True,94.030947,1.0,High,Excellent
1,17620347,aspnetcore,dotnet/aspnetcore,dotnet,ASP.NET Core is a cross-platform .NET framewor...,C#,2014-03-11 06:09:42+00:00,2025-10-18 22:08:05+00:00,2025-10-18 18:20:00+00:00,37271,...,1.0,34.415842,1.0,10.223529,1.0,True,80.771861,0.995,High,Excellent
2,199570071,v2rayN,2dust/v2rayN,2dust,"A GUI client for Windows, Linux and macOS, sup...",C#,2019-07-30 03:47:24+00:00,2025-10-18 22:08:26+00:00,2025-10-17 13:01:36+00:00,88248,...,1.0,0.059406,0.5,0.067416,0.5,False,62.079101,0.824452,High,Basic


In [2]:
# Aggregate language-level metrics for 3D plots
numeric_metrics = [
    "stars",
    "forks",
    "watchers",
    "commits_30d",
    "contributors_count",
    "activity_score",
    "health_score",
    "overall_score",
    "compliance_score",
    "recency_score"
]

language_summary = (
    df.groupby("language")[numeric_metrics]
    .mean()
    .reset_index()
)

# Attach categorical descriptors via statistical mode
categorical_metrics = ["growth_segment", "growth_signal", "compliance_tier"]
for column in categorical_metrics:
    language_summary[column] = (
        df.groupby("language")[column]
        .agg(lambda s: s.mode().iloc[0] if not s.mode().empty else np.nan)
        .values
    )

# Calculate raw popularity index
raw_popularity = language_summary[["stars", "forks", "watchers"]].mean(axis=1)

# Normalize popularity to 0-100 scale for better visualization
popularity_min = raw_popularity.min()
popularity_max = raw_popularity.max()
language_summary["popularity_index"] = ((raw_popularity - popularity_min) / (popularity_max - popularity_min)) * 100

language_summary["enterprise_readiness_pct"] = language_summary["compliance_score"] * 100
language_summary["contributors_count"] = language_summary["contributors_count"].clip(lower=1)

language_summary.sort_values("overall_score", ascending=False).head()

Unnamed: 0,language,stars,forks,watchers,commits_30d,contributors_count,activity_score,health_score,overall_score,compliance_score,recency_score,growth_segment,growth_signal,compliance_tier,popularity_index,enterprise_readiness_pct
9,Rust,34618.33,2337.57,34618.33,44.95,258.37,61.621793,68.0,49.395352,0.7475,92.090411,High,1.0,Good,29.683694,74.75
11,TypeScript,68601.51,9956.77,68601.51,52.07,311.21,69.439082,70.4,48.328033,0.7875,96.380822,High,1.0,Excellent,84.031243,78.75
2,Go,43019.2,4937.63,43019.2,38.76,249.6,58.924183,68.4,46.916172,0.7425,90.824658,High,1.0,Good,43.633982,74.25
1,C++,37213.89,6835.09,37213.89,41.6,222.23,56.153807,71.8,44.123312,0.7425,83.610959,High,1.0,Good,36.650023,74.25
7,Python,78107.21,13154.42,78107.21,34.93,227.56,53.522019,65.8,43.488295,0.67,83.926027,High,0.998904,Basic,100.0,67.0


In [3]:
output_dir = Path("public/visualizations")
output_dir.mkdir(parents=True, exist_ok=True)

scatter_fig = px.scatter_3d(
    language_summary,
    x="activity_score",
    y="health_score",
    z="popularity_index",
    color="enterprise_readiness_pct",
    color_continuous_scale="Sunsetdark",
    size="contributors_count",
    size_max=28,
    hover_name="language",
    hover_data={
        "overall_score":":.1f",
        "enterprise_readiness_pct":":.1f",
        "contributors_count":":.0f",
        "growth_segment": True,
        "growth_signal": True
    },
    labels={
        "activity_score": "Activity Score",
        "health_score": "Health Score",
        "popularity_index": "Popularity Metric",
        "enterprise_readiness_pct": "Enterprise Readiness (%)"
    },
    title="Language Landscape: Activity vs Health vs Popularity"
)

scatter_fig.update_layout(
    legend_title_text="Enterprise Readiness (%)",
    margin=dict(l=20, r=20, t=70, b=20)
)

scatter_path = output_dir / "enhanced_3d_language_analysis.html"
scatter_fig.write_html(scatter_path)
print(f"Saved 3D scatter to {scatter_path}")
scatter_fig.show()

Saved 3D scatter to public\visualizations\enhanced_3d_language_analysis.html


In [4]:
output_dir = Path("../public/visualizations")
output_dir.mkdir(parents=True, exist_ok=True)

scatter_fig = px.scatter_3d(
    language_summary,
    x="activity_score",
    y="health_score",
    z="popularity_index",
    color="enterprise_readiness_pct",
    color_continuous_scale="Sunsetdark",
    size="contributors_count",
    size_max=28,
    hover_name="language",
    hover_data={
        "overall_score":":.1f",
        "enterprise_readiness_pct":":.1f",
        "contributors_count":":.0f",
        "growth_segment": True,
        "growth_signal": True
    },
    labels={
        "activity_score": "Activity Score",
        "health_score": "Health Score",
        "popularity_index": "Popularity Metric",
        "enterprise_readiness_pct": "Enterprise Readiness (%)"
    },
    title="Language Landscape: Activity vs Health vs Popularity"
 )

scatter_fig.update_layout(
    legend_title_text="Enterprise Readiness (%)",
    margin=dict(l=20, r=20, t=70, b=20)
 )

scatter_path = output_dir / "enhanced_3d_language_analysis.html"
scatter_fig.write_html(scatter_path)
print(f"Saved 3D scatter to {scatter_path}")
scatter_fig.show()

Saved 3D scatter to ..\public\visualizations\enhanced_3d_language_analysis.html


In [5]:
focus_languages = ordered.head(6)["language"].tolist()
repo_subset = df[df["language"].isin(focus_languages)].copy()
repo_subset["enterprise_ready_pct"] = repo_subset["compliance_score"] * 100

repo_fig = px.scatter_3d(
    repo_subset,
    x="stars",
    y="forks",
    z="contributors_count",
    color="language",
    size="commits_30d",
    hover_data={
        "full_name": True,
        "overall_score":":.1f",
        "activity_score":":.1f",
        "health_score":":.1f",
        "enterprise_ready_pct":":.1f",
        "growth_segment": True
    },
    labels={
        "contributors_count": "Contributors",
        "commits_30d": "Commits (30d)",
        "enterprise_ready_pct": "Enterprise Readiness (%)"
    },
    title="Repository Distribution for Top Languages",
    opacity=0.75
 )

repo_fig.update_layout(margin=dict(l=20, r=20, t=60, b=20))

repo_path = output_dir / "language_mesh_network.html"
repo_fig.write_html(repo_path)
print(f"Saved repository-level 3D view to {repo_path}")
repo_fig.show()

NameError: name 'ordered' is not defined

In [None]:
fig = px.scatter(
    timeline_df,
    x='avg_activity',
    y='avg_overall_score',
    size='cumulative_repos',
    color='language',
    animation_frame='year',
    animation_group='language',
    size_max=45,
    range_x=[0, 100],
    range_y=[0, 60],
    hover_name='language',
    hover_data={
        'cumulative_repos': ':,',
        'avg_stars': ':,.0f',
        'total_stars': ':,.0f',
        'avg_activity': ':.1f',
        'avg_overall_score': ':.1f'
    },
    labels={
        'avg_activity': 'Average Activity Score',
        'avg_overall_score': 'Average Overall Score',
        'cumulative_repos': 'Total Repositories',
        'avg_stars': 'Avg Stars',
        'total_stars': 'Total Stars'
    },
    title='Language Evolution Timeline (2013-2025)<br><sub>Bubble size = cumulative repository count</sub>'
 )

fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 800
fig.layout.updatemenus[0].buttons[0].args[1]["transition"]["duration"] = 400

fig.update_xaxes(
    showgrid=True,
    gridwidth=1,
    gridcolor='rgba(128,128,128,0.2)'
 )
fig.update_yaxes(
    showgrid=True,
    gridwidth=1,
    gridcolor='rgba(128,128,128,0.2)'
 )

fig.update_layout(
    paper_bgcolor='#0d1117',
    plot_bgcolor='#0d1117',
    font=dict(color='white', size=12),
    height=700,
    margin=dict(l=80, r=40, t=100, b=80),
    legend=dict(
        title=dict(text='Language', font=dict(size=14)),
        font=dict(size=11),
        bgcolor='rgba(13,17,23,0.8)',
        bordercolor='#21262d',
        borderwidth=1
    )
 )

animated_path = output_dir / "animated_language_evolution.html"
fig.write_html(animated_path)
print(f"✅ Saved animated timeline to {animated_path}")
fig.show()

Timeline data spans 2013 to 2025


Unnamed: 0,year,language,cumulative_repos,avg_stars,avg_overall_score,total_stars,avg_activity
0,2013,C#,9,22550.444444,33.743064,202954,34.297372
1,2013,C++,5,50142.0,39.122515,250710,37.962782
2,2013,Go,13,49306.230769,48.366889,640981,63.783275
3,2013,Java,11,32161.545455,44.43675,353777,53.399518
4,2013,JavaScript,11,64016.818182,43.532586,704185,52.783296


## Animated Timeline: Language Evolution Over Time

This section creates a clear, easy-to-follow animated visualization showing how languages evolve across years (2013–2025). Bubble size reflects cumulative repository count; position encodes average activity (x) and overall score (y); color distinguishes languages.

In [None]:
# Create a clean animated bubble chart showing language growth
fig = px.scatter(
    timeline_df,
    x='avg_activity',
    y='avg_overall_score',
    size='cumulative_repos',
    color='language',
    animation_frame='year',
    animation_group='language',
    size_max=45,
    range_x=[0, 100],
    range_y=[0, 60],
    hover_name='language',
    hover_data={
        'cumulative_repos': ':,',
        'avg_stars': ':,.0f',
        'total_stars': ':,.0f',
        'avg_activity': ':.1f',
        'avg_overall_score': ':.1f'
    },
    labels={
        'avg_activity': 'Average Activity Score',
        'avg_overall_score': 'Average Overall Score',
        'cumulative_repos': 'Total Repositories',
        'avg_stars': 'Avg Stars',
        'total_stars': 'Total Stars'
    },
    title='Language Evolution Timeline (2013-2025)<br><sub>Bubble size = cumulative repository count</sub>'
)

# Customize the animation
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 800
fig.layout.updatemenus[0].buttons[0].args[1]["transition"]["duration"] = 400

# Add grid for easier reading
fig.update_xaxes(
    showgrid=True,
    gridwidth=1,
    gridcolor='rgba(128,128,128,0.2)'
)
fig.update_yaxes(
    showgrid=True,
    gridwidth=1,
    gridcolor='rgba(128,128,128,0.2)'
)

fig.update_layout(
    paper_bgcolor='#0d1117',
    plot_bgcolor='#0d1117',
    font=dict(color='white', size=12),
    height=700,
    margin=dict(l=80, r=40, t=100, b=80),
    legend=dict(
        title=dict(text='Language', font=dict(size=14)),
        font=dict(size=11),
        bgcolor='rgba(13,17,23,0.8)',
        bordercolor='#21262d',
        borderwidth=1
    )
)

# Save the animated visualization
animated_path = output_dir / "animated_language_evolution.html"
fig.write_html(animated_path)
print(f"✅ Saved animated timeline to {animated_path}")
fig.show()

✅ Saved animated timeline to public\visualizations\animated_language_evolution.html
