# LF AI & Data Foundation - Tool Exploratory Data Analysis

Exploring tools which are part of or associated with the LF AI & Data Foundation.

In [None]:
from datetime import datetime

import pandas as pd
import pytz
import requests
import yaml

utc = pytz.UTC

In [None]:
# gather data
link = (
    "https://raw.githubusercontent.com/lfai/lfai-landscape/main/processed_landscape.yml"
)
raw_content = requests.get(link)
len(raw_content.text)

In [None]:
# load yaml as dict
dict_content = yaml.safe_load(raw_content.text)
dict_content.keys()

In [None]:
# flatten and store data in pandas df
df = pd.json_normalize(
    data=dict_content["landscape"],
    record_path=["subcategories", "items"],
    meta=[["category", "name"]],
).drop("item", axis=1)
df.head()

In [None]:
# gather days since initial commit
df["github_start_commit_data.start_date"] = pd.to_datetime(
    df["github_start_commit_data.start_date"]
)
df["days_since_first_commit"] = (
    (df["github_start_commit_data.start_date"] - pd.to_datetime(datetime.now(tz=utc)))
    .abs()
    .astype("timedelta64[D]")
)
df["days_since_first_commit"].head()

In [None]:
df["github_start_commit_data.start_date"]

In [None]:
df["category.name"].value_counts()

In [None]:
# show sorted workflow tools
df[df["category.name"] == "Workflow"].sort_values(
    ["github_data.stars", "github_data.contributors_count", "days_since_first_commit"],
    ascending=False,
)[
    [
        "name",
        "homepage_url",
        "crunchbase_data.name",
        "github_data.stars",
        "github_data.contributors_count",
        "days_since_first_commit",
    ]
]

In [None]:
# show sorted vizualization tools
df[df["category.name"] == "Visualization"].sort_values(
    ["github_data.stars", "github_data.contributors_count", "days_since_first_commit"],
    ascending=False,
)[
    [
        "name",
        "homepage_url",
        "crunchbase_data.name",
        "github_data.stars",
        "github_data.contributors_count",
        "days_since_first_commit",
    ]
]