In [None]:
# default_exp stats

# Stats

> This module provides some wrapping around [PyGitub](https://github.com/PyGithub/PyGithub) to grab some stats from GitHub for a particular organization

In [None]:
#hide
from nbdev import show_doc

In [None]:
#export
import github
import pandas
from typing import Union, List, Optional, Generator, Dict, Iterable
import pathlib

In [None]:
#export
from dotenv import load_dotenv
from github import Github
import pandas as pd
import os
from fastcore.all import *
from pathlib import Path
from functools import lru_cache
from toolz import itertoolz

### Authentication 

To access the GitHub api you need an access token. You can create one here: https://github.com/settings/tokens. 

The access token will require `repo` scope. When working with this module locally it's probably easiest to put this token in a `.env` file, and use `dot_env` to load it. See the [python-dotenv](https://github.com/theskumar/python-dotenv) for further documentation. Alternatively you may want to save the token in a GitHub Secret, especially if you are planning to use this code as part of a GitHub Action. 

In [None]:
#hide
load_dotenv()

True

In [None]:
#export
def create_github_session(GH_TOKEN):
    """creates a session for GitHub"""
    global g
    g = Github(GH_TOKEN)
    return g

In [None]:
GH_TOKEN = os.getenv("GH_TOKEN")

In [None]:
create_github_session(GH_TOKEN)

<github.MainClass.Github at 0x115cd1a00>

In [None]:
#hide
assert type(g) == github.MainClass.Github

# OrgStats
`OrgStats` is a class that contains functionality for getting statistics for a GitHub organization. 


In [None]:
#export
class OrgStats:
    """Class for collecting GitHub statistics for an Organization"""
    def __init__(self, GH_TOKEN: str, org: str):
        """
        Parameters
        ----------
        GH_TOKEN : str
            `GH_TOKEN` is a GitHub access token with at least public repo scope.
            See https://github.com/settings/tokens
        org : str
            a Github Organization
        """
        self.__GH_TOKEN = GH_TOKEN
        self.gh_session = create_github_session(self.__GH_TOKEN)
        self.org = self._get_org(org)

    def __str__(self):
        return f"OrgStats: {self.org.name} "

    def __repr__(self):
        return self.__str__()

    def _get_org(self, org:'str'):
        return g.get_organization(org)

In [None]:
show_doc(OrgStats)

<h2 id="OrgStats" class="doc_header"><code>class</code> <code>OrgStats</code><a href="" class="source_link" style="float:right">[source]</a></h2>

> <code>OrgStats</code>(**`GH_TOKEN`**:`str`, **`org`**:`str`)

Class for collecting GitHub statistics for an Organization

In [None]:
load_dotenv()
GH_TOKEN = os.getenv("GH_TOKEN")

To use `org_stats` you need to pass in a token to authenticate the GitHub API, and the name of a GitHub organization. We use [ghorgstatstestorg](https://github.com/ghorgstatstestorg) for these examples.

In [None]:
test_org = OrgStats(GH_TOKEN, "ghorgstatstestorg")
test_org

OrgStats: ghorgstatstestorg 

In [None]:
#export
@patch_to(OrgStats)
@lru_cache(maxsize=512)
def get_repos(self, pub_status: Union[None, str] = None) -> List[github.Repository.Repository]:
    """
    Returns repositories for organisaton
    optional `pub_status` filter for `public` or `private` repositories
    """
    org = self.org
    all_repos = [repo for repo in org.get_repos()]
    if pub_status:
        if pub_status == 'private':
            return list(filter(lambda x: x.private == True, all_repos))
        elif pub_status == 'public':
            return list(filter(lambda x: x.private == False, all_repos))
    return all_repos

In [None]:
show_doc(OrgStats.get_repos)

<h4 id="OrgStats.get_repos" class="doc_header"><code>OrgStats.get_repos</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>OrgStats.get_repos</code>(**`pub_status`**:`Union`\[`NoneType`, `str`\]=*`None`*)

Returns repositories for organisaton
optional `pub_status` filter for `public` or `private` repositories

`get_repos` returns all repositories associated with an organization. We can optionally filter by public status. 

In [None]:
test_org.get_repos()

[Repository(full_name="ghorgstatstestorg/repo1"),
 Repository(full_name="ghorgstatstestorg/repo2"),
 Repository(full_name="ghorgstatstestorg/private_repo_1")]

This can also be access via `repos`, `public_repos` and `private_repos` `OrgStats` attributes

In [None]:
#export
@patch_to(OrgStats, as_prop=True)
@lru_cache(maxsize=128)
def repos(self):
    """all repositories of `org`"""
    return self.get_repos()

In [None]:
#hide
test_org = OrgStats(GH_TOKEN, "ghorgstatstestorg")
assert len(L(test_org.repos).map(type).unique()) == 1
assert L(test_org.repos).map(type).unique()[0] == github.Repository.Repository

In [None]:
#export
@patch_to(OrgStats, as_prop=True)
def repo_count(self):
    """count of all repositories of `org`"""
    return len(self.get_repos())

In [None]:
#hide
type(test_org.repo_count) == int

True

In [None]:
#export
@patch_to(OrgStats, as_prop=True)
@lru_cache(maxsize=128)
def public_repos(self):
    """public repositories of `org`"""
    return self.get_repos('public')

In [None]:
test_org = OrgStats(GH_TOKEN, "ghorgstatstestorg")
assert L(test_org.public_repos).map(lambda x: x.private).unique()[0] == False

In [None]:
#export
@patch_to(OrgStats, as_prop=True)
def public_repo_count(self):
    """count of public repositories of `org`"""
    return len(self.get_repos('public'))

In [None]:
#export
@patch_to(OrgStats, as_prop=True)
@lru_cache(maxsize=128)
def private_repos(self):
    """private repositories of `org`"""
    return self.get_repos('private')

In [None]:
test_org = OrgStats(GH_TOKEN, "ghorgstatstestorg")
assert L(test_org.private_repos).map(lambda x: x.private).unique()[0] == True

In [None]:
#export
@patch_to(OrgStats, as_prop=True)
def private_repo_count(self):
    """count of private repositories of `org`"""
    return len(self.get_repos('private'))

The repo attributues can be used to access repositories by type, for example accessing only public repos via `public_repos`

In [None]:
test_org.public_repos

[Repository(full_name="ghorgstatstestorg/repo1"),
 Repository(full_name="ghorgstatstestorg/repo2")]

# Files 
These methods retrieve information about the files in the repositories.

The files in a repository and the extension of those files can give some information about the kind of content repositories hold. For example if you promised a funder lots of tutorials you may expect more `.ipynb` files.

In [None]:
#export
@patch_to(OrgStats)
def get_repo_files(self, repo: Union[str, github.Repository.Repository]) -> Generator[github.ContentFile.ContentFile, None, None]:
    """return files for `repo`"""
    files = []
    if type(repo) == str:
        repo = self.org.get_repo(repo)
    contents = repo.get_contents("")
    while contents:
        file_content = contents.pop(0)
        if file_content.type == "dir":
            contents.extend(repo.get_contents(file_content.path))
        else:
            yield file_content

In [None]:
show_doc(OrgStats.get_repo_files)

<h4 id="OrgStats.get_repo_files" class="doc_header"><code>OrgStats.get_repo_files</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>OrgStats.get_repo_files</code>(**`repo`**:`Union`\[`str`, `Repository`\])

return files for `repo`

In [None]:
#export
@patch_to(OrgStats)
@lru_cache(maxsize=512)
def get_org_files(self, pub_status: Union[None, str]=None) -> Dict[str, List[github.ContentFile.ContentFile]]:
    """returns repo files for `org`"""
    org_files = {}
    if pub_status:
        if pub_status == 'private':
            for repo in self.private_repos:
                org_files[repo.name] = list(self.get_repo_files(repo))
        elif pub_status == 'public':
            for repo in self.public_repos:
                org_files[repo.name] = list(self.get_repo_files(repo))
    else:
        for repo in self.repos:
            org_files[repo.name] = list(self.get_repo_files(repo))
    return org_files

In [None]:
show_doc(OrgStats.get_org_files)

<h4 id="OrgStats.get_org_files" class="doc_header"><code>OrgStats.get_org_files</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>OrgStats.get_org_files</code>(**`pub_status`**:`Union`\[`NoneType`, `str`\]=*`None`*)

returns repo files for `org`

Files can also be access via the `files`, `files_public` and `files_private` attributes. 

In [None]:
#export
@patch_to(OrgStats, as_prop=True)
@lru_cache(maxsize=256)
def files(self):
    """files for all repos"""
    return self.get_org_files(pub_status='private')

In [None]:
show_doc(OrgStats.files)

<h4 id="OrgStats.files" class="doc_header"><code>OrgStats.files</code><a href="" class="source_link" style="float:right">[source]</a></h4>

files for all repos

In [None]:
#export
@patch_to(OrgStats, as_prop=True)
@lru_cache()
def files_public(self):
    """files for public repos"""
    return self.get_org_files(pub_status='public')

In [None]:
show_doc(OrgStats.files_public)

<h4 id="OrgStats.files_public" class="doc_header"><code>OrgStats.files_public</code><a href="" class="source_link" style="float:right">[source]</a></h4>

files for public repos

In [None]:
#export
@patch_to(OrgStats, as_prop=True)
@lru_cache()
def files_private(self):
    """files for private repos"""
    return self.get_org_files(pub_status='private')

In [None]:
show_doc(OrgStats.files_private)

<h4 id="OrgStats.files_private" class="doc_header"><code>OrgStats.files_private</code><a href="" class="source_link" style="float:right">[source]</a></h4>

files for private repos

### Helper function

In [None]:
#export
def get_ext(x): return Path(x).suffix

In [None]:
#export
def _get_ext_freqs(files:Generator[github.ContentFile.ContentFile,None,None]) -> Dict[str,int]:
    """Returns frequencies of file extension in `files`"""
    file_list = [file.name for file in files]
    return itertoolz.frequencies(filter(lambda x: x!='',map(get_ext,file_list)))

In [None]:
#export
@patch_to(OrgStats)
def get_repo_file_ext_frequency(self, repo: Union[str, github.Repository.Repository]) -> Dict[str,int]:
    """returns frequencies of file extensions for `repo` """
    files = self.get_repo_files(repo)
    return _get_ext_freqs(files)

In [None]:
show_doc(OrgStats.get_repo_file_ext_frequency)

<h4 id="OrgStats.get_repo_file_ext_frequency" class="doc_header"><code>OrgStats.get_repo_file_ext_frequency</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>OrgStats.get_repo_file_ext_frequency</code>(**`repo`**:`Union`\[`str`, `Repository`\])

returns frequencies of file extensions for `repo` 

In [None]:
test_org.get_repo_file_ext_frequency('repo2')

{'.md': 1, '.py': 1}

In [None]:
#export
@patch_to(OrgStats)
def get_org_file_ext_frequency(self, pub_status: Union[None, str] = None) ->Dict[str, Dict[str,int]]:
    """returns frequencies of file extensions for repos in `OrgStats` `org` """
    return {k: _get_ext_freqs(v) for k,v in self.get_org_files(pub_status).items()}

In [None]:
show_doc(OrgStats.get_org_file_ext_frequency)

<h4 id="OrgStats.get_org_file_ext_frequency" class="doc_header"><code>OrgStats.get_org_file_ext_frequency</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>OrgStats.get_org_file_ext_frequency</code>(**`pub_status`**:`Union`\[`NoneType`, `str`\]=*`None`*)

returns frequencies of file extensions for repos in [`OrgStats`](/gh_orgstats/stats.html#OrgStats) `org` 

### Snapshot stats
There are two flavours of stats accessible via GitHub ones which are 'snapshots' in time and ones which are cumulative over time. 'Snapshot' stats include 'forks' and 'stars'. Although these can go up and down overtime, we mainly care about their current numbers. 

In [None]:
#export 
@patch_to(OrgStats)
def get_org_snapshot_stats(self, repos: Iterable[github.Repository.Repository]) -> Dict[str,Dict[str,int]]:
    """Returns dictionary of star and fork counts for `repos`"""
    repos_stats = {}
    for repo in repos:
        stats = {'stars': repo.get_stargazers().totalCount}
        stats['forks'] = repo.get_forks().totalCount
        repos_stats[repo.name] = stats
    return repos_stats

In [None]:
show_doc(OrgStats.get_org_snapshot_stats)

<h4 id="OrgStats.get_org_snapshot_stats" class="doc_header"><code>OrgStats.get_org_snapshot_stats</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>OrgStats.get_org_snapshot_stats</code>(**`repos`**:`Iterable`\[`Repository`\])

Returns dictionary of star and fork counts for `repos`

You can also access `get_org_snapshot_stats` via `OrgStats` `snapshot_stats` property. 

In [None]:
#export
@patch_to(OrgStats, as_prop=True)
@lru_cache(maxsize=256)
def snapshot_stats(self) -> pd.DataFrame:
    """Returns a Pandas DataFrame of star and fork counts for public repos"""
    return pd.DataFrame.from_dict(self.get_org_snapshot_stats(self.public_repos), orient='index')

In [None]:
show_doc(OrgStats.snapshot_stats)

<h4 id="OrgStats.snapshot_stats" class="doc_header"><code>OrgStats.snapshot_stats</code><a href="" class="source_link" style="float:right">[source]</a></h4>

Returns a Pandas DataFrame of star and fork counts for public repos

In [None]:
test_org = OrgStats(GH_TOKEN, "ghorgstatstestorg")
test_org.snapshot_stats

Unnamed: 0,stars,forks
repo1,1,0
repo2,0,0


### Long view stats
These are the other flavour of GitHub stats, these are traffic stats which include visits to a repository on GitHub and clones of organization repositories. By default GitHub only provides access two recent information for these stats. This means if we want to be able to access longer term information for these stats we need to store and update this information on a regular basis ourselves. This is what the below do in combination with Github actions. 

### Traffic stats

In [None]:
#export 
@patch_to(OrgStats)
def get_repo_views_traffic(self, repo: Union[str,github.Repository.Repository], save_dir:Union[str, pathlib.Path]='view_data', load=False) -> pd.DataFrame:
    """gets views traffic for `repo` and saves as csv in `save_dir`

    Parameters
    ----------
    repo : Union[str,github.Repository.Repository]
        repository from `org`
    save_dir : Union[str, pathlib.Path], optional
        directory where output CSV should be saved, by default 'view_data'
    load : bool, optional
        load data into a Pandas DataFrame, by default False

    Returns
    -------
    pd.DataFrame
        contains unique and total views for `repo` with dates
    """
    if type(repo) == str:
        repo = self.org.get_repo(repo)
    traffic = repo.get_views_traffic()
    traffic_dict = {
        view.timestamp: {
            "total_views": view.count,
            "unique_views": view.uniques,
        }
        for view in traffic['views']
    }

    try:
        old_traffic_data = pd.read_csv(f'{save_dir}/{repo.name}_views_traffic.csv', index_col="_date", parse_dates=["_date"]).to_dict(orient="index")
        updated_dict = {**old_traffic_data, **traffic_dict}
        traffic_frame = pd.DataFrame.from_dict(data=updated_dict, orient="index", columns=["total_views", "unique_views"])
    except:
        traffic_frame = pd.DataFrame.from_dict(data=traffic_dict, orient="index", columns=["total_views", "unique_views"])
    traffic_frame.index.name = "_date"
    if not Path(save_dir).exists():
        Path(save_dir).mkdir()
    traffic_frame.to_csv(f'{save_dir}/{repo.name}_views_traffic.csv')
    if load:
        return traffic_frame

In [None]:
#hide
test_org = OrgStats(GH_TOKEN, "ghorgstatstestorg")
test_org.get_repo_views_traffic(test_org.repos[0], 'test_dir')
assert Path('test_dir').exists() == True
assert type(pd.read_csv('test_dir/repo1_views_traffic.csv')) == pd.core.frame.DataFrame
assert len(pd.read_csv('test_dir/repo1_views_traffic.csv',index_col="_date", parse_dates=["_date"]).columns) ==2 
Path('test_dir/repo1_views_traffic.csv').unlink()
Path('test_dir').rmdir()

In [None]:
show_doc(OrgStats.get_repo_views_traffic, doc_string=False)

<h4 id="OrgStats.get_repo_views_traffic" class="doc_header"><code>OrgStats.get_repo_views_traffic</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>OrgStats.get_repo_views_traffic</code>(**`repo`**:`Union`\[`str`, `Repository`\], **`save_dir`**:`Union`\[`str`, `Path`\]=*`'view_data'`*, **`load`**=*`False`*)



Gets views traffic for `repo` and saves as csv in `save_dir`. 
`repo` is an repository under the GitHub Organization.
`save_dir` is the directory where output CSV should be saved, by default `view_data`
load is an optional flag which loads data into a Pandas DataFrame, by default `False`

In [None]:
test_org = OrgStats(GH_TOKEN, "ghorgstatstestorg")
test_org.get_repo_views_traffic(test_org.repos[0], 'test_dir',load=True).head(3)

Unnamed: 0_level_0,total_views,unique_views
_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-11-30,2,1
2020-12-01,1,1


In [None]:
#hide
csvs = list(Path('test_dir').glob('*.csv'));list(map(Path.unlink,csvs));Path('test_dir').rmdir()

In [None]:
#export 
@patch_to(OrgStats)
def get_org_views_traffic(self, public_only:bool=True, save_dir:Union[str,pathlib.Path]='view_data', 
repos:Optional[Iterable[github.Repository.Repository]]=None, load=False) -> Union[None, pd.DataFrame]:
    """Get view traffic for multiple repos from `Org`

    Parameters
    ----------
    public_only : bool, optional
        only get stats for public repos, by default True
    save_dir : Union[str,pathlib.Path], optional
        directory where csvs of stats should be saved, by default 'view_data'
    repos : Optional[Iterable[github.Repository.Repository]], optional
        to access stats for a specific set of repos, by default None
    load : bool, optional
        whether to load views data into a DataFrame, by default False

    Returns
    -------
    Union[None, pd.DataFrame]
    """
    if public_only and not repos:
        repos = self.public_repos
    dfs = []
    for repo in repos:
        df = self.get_repo_views_traffic(repo, save_dir,load)
        dfs.append(df)
    if load:
        org_traffic = {}
        for repo, df in zip(repos,dfs):
            repo_views_traffic_dict = df.to_dict()
            repo_name = repo.name
            org_traffic[repo_name] = repo_views_traffic_dict
        return pd.DataFrame.from_dict(
            {
                (i, j): org_traffic[i][j]
                for i in org_traffic
                for j in org_traffic[i].keys()
            }
        )

In [None]:
show_doc(OrgStats.get_org_views_traffic)

<h4 id="OrgStats.get_org_views_traffic" class="doc_header"><code>OrgStats.get_org_views_traffic</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>OrgStats.get_org_views_traffic</code>(**`public_only`**:`bool`=*`True`*, **`save_dir`**:`Union`\[`str`, `Path`\]=*`'view_data'`*, **`repos`**:`Optional`\[`Iterable`\[`Repository`\]\]=*`None`*, **`load`**=*`False`*)

Get view traffic for multiple repos from `Org`

Parameters
----------
public_only : bool, optional
    only get stats for public repos, by default True
save_dir : Union[str,pathlib.Path], optional
    directory where csvs of stats should be saved, by default 'view_data'
repos : Optional[Iterable[github.Repository.Repository]], optional
    to access stats for a specific set of repos, by default None
load : bool, optional
    whether to load views data into a DataFrame, by default False

Returns
-------
Union[None, pd.DataFrame]

In [None]:
test_org = OrgStats(GH_TOKEN, "ghorgstatstestorg")
test_org.get_org_views_traffic(load=True).head(3)

Unnamed: 0_level_0,repo1,repo1,repo2,repo2
Unnamed: 0_level_1,total_views,unique_views,total_views,unique_views
2020-11-30,2,1,8.0,1.0
2020-12-01,1,1,,


In [None]:
assert len(test_org.get_org_views_traffic(load=True).columns)/2 == len(test_org.public_repos)
assert len(test_org.get_org_views_traffic(repos=test_org.repos, load=True).columns)/2 == len(test_org.repos)

### Clones

In [None]:
#export 
@patch_to(OrgStats)
def get_repo_clones_traffic(self, repo: github.Repository.Repository, 
                            save_dir:Union[str, pathlib.Path]='clones_data', load=False):
    """gets clones traffic for `repo` and saves as csv in `save_dir`

    Parameters
    ----------
    repo : Union[str,github.Repository.Repository]
        repository from `org`
    save_dir : Union[str, pathlib.Path], optional
        directory where output CSV should be saved, by default 'view_data'
    load : bool, optional
        load data into a Pandas DataFrame, by default False

    Returns
    -------
    pd.DataFrame
        contains unique and total clones for `repo` with dates
    """
    if type(repo) == str:
        repo = self.org.get_repo(repo)
    clones = repo.get_clones_traffic()
    clones_dict = {
        view.timestamp: {
            "total_clones": view.count,
            "unique_clones": view.uniques,
        }
        for view in clones['clones']
    }

    try:
        old_clones_data = pd.read_csv(f'clones_data/{repo.name}_clones_traffic.csv', index_col="_date", parse_dates=["_date"]).to_dict(orient="index")
        updated_clones_dict = {**old_clones_data, **clones_dict}
        clones_frame = pd.DataFrame.from_dict(data=updated_clones_dict, orient="index", columns=["total_clones", "unique_clones"])
    except:
        clones_frame = pd.DataFrame.from_dict(data=clones_dict, orient="index", columns=["total_clones", "unique_clones"])
    clones_frame.index.name = "_date"
    if not Path(save_dir).exists():
        Path(save_dir).mkdir()
    clones_frame.to_csv(f'{save_dir}/{repo.name}_clones_traffic.csv')
    if load:
        return clones_frame

In [None]:
show_doc(OrgStats.get_repo_clones_traffic)

<h4 id="OrgStats.get_repo_clones_traffic" class="doc_header"><code>OrgStats.get_repo_clones_traffic</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>OrgStats.get_repo_clones_traffic</code>(**`repo`**:`Repository`, **`save_dir`**:`Union`\[`str`, `Path`\]=*`'clones_data'`*, **`load`**=*`False`*)

gets clones traffic for `repo` and saves as csv in `save_dir`

Parameters
----------
repo : Union[str,github.Repository.Repository]
    repository from `org`
save_dir : Union[str, pathlib.Path], optional
    directory where output CSV should be saved, by default 'view_data'
load : bool, optional
    load data into a Pandas DataFrame, by default False

Returns
-------
pd.DataFrame
    contains unique and total clones for `repo` with dates

In [None]:
test_org = OrgStats(GH_TOKEN, "ghorgstatstestorg")
test_org.get_repo_clones_traffic('repo1',save_dir='test_dir', load=True)

Unnamed: 0_level_0,total_clones,unique_clones
_date,Unnamed: 1_level_1,Unnamed: 2_level_1


In [None]:
assert len(test_org.get_repo_clones_traffic(test_org.public_repos[1], load=True).columns) == 2

In [None]:
#hide
csvs = list(Path('test_dir').glob('*.csv'));list(map(Path.unlink,csvs));Path('test_dir').rmdir()

In [None]:
#export 
@patch_to(OrgStats)
def get_org_clones_traffic(self, public_only:bool = True, repos: Optional[Iterable[github.Repository.Repository]] = None, 
                           save_dir:Union[str,pathlib.Path]='clones_data', load=False) -> Union[None,pd.DataFrame]:
    """get clone traffic for multiple repos from `Org`

    Parameters
    ----------
    public_only : bool, optional
        only get stats for public repos, by default True
    save_dir : Union[str,pathlib.Path], optional
        directory where csvs of stats should be saved, by default 'view_data'
    repos : Optional[Iterable[github.Repository.Repository]], optional
        to access stats for a specific set of repos, by default None
    load : bool, optional
        whether to load views data into a DataFrame, by default False

    Returns
    -------
    Union[None, pd.DataFrame]
    """
    if public_only and not repos:
            repos = self.public_repos
    dfs = []
    for repo in repos:
        df = self.get_repo_clones_traffic(repo, save_dir,load)
        dfs.append(df)
    if load:
        clones_traffic = {}
        for repo, df in zip(repos,dfs):
            repo_clones_traffic_dict = df.to_dict()
            repo_name = repo.name
            clones_traffic[repo_name] = repo_clones_traffic_dict
        return pd.DataFrame.from_dict(
            {
                (i, j): clones_traffic[i][j]
                for i in clones_traffic
                for j in clones_traffic[i].keys()
            }
        )

In [None]:
show_doc(OrgStats.get_org_clones_traffic)

<h4 id="OrgStats.get_org_clones_traffic" class="doc_header"><code>OrgStats.get_org_clones_traffic</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>OrgStats.get_org_clones_traffic</code>(**`public_only`**:`bool`=*`True`*, **`repos`**:`Optional`\[`Iterable`\[`Repository`\]\]=*`None`*, **`save_dir`**:`Union`\[`str`, `Path`\]=*`'clones_data'`*, **`load`**=*`False`*)

get clone traffic for multiple repos from `Org`

Parameters
----------
public_only : bool, optional
    only get stats for public repos, by default True
save_dir : Union[str,pathlib.Path], optional
    directory where csvs of stats should be saved, by default 'view_data'
repos : Optional[Iterable[github.Repository.Repository]], optional
    to access stats for a specific set of repos, by default None
load : bool, optional
    whether to load views data into a DataFrame, by default False

Returns
-------
Union[None, pd.DataFrame]

In [None]:
test_org = OrgStats(GH_TOKEN, "ghorgstatstestorg")
assert type(test_org.get_org_clones_traffic(repos=test_org.repos, save_dir='test_dir',load=True)) == pd.core.frame.DataFrame
assert (len(test_org.get_org_clones_traffic(save_dir='test_dir',load=True).columns) /2)  == test_org.public_repo_count 

In [None]:
#hide
test_org = OrgStats(GH_TOKEN, "ghorgstatstestorg")
test_org.get_org_clones_traffic(save_dir='test_dir')
assert Path('test_dir').exists() == True
assert type(pd.read_csv('test_dir/repo1_clones_traffic.csv')) == pd.core.frame.DataFrame
assert len(pd.read_csv('test_dir/repo1_clones_traffic.csv',index_col="_date", parse_dates=["_date"]).columns) ==2 
csvs = list(Path('test_dir').glob('*.csv'));list(map(Path.unlink,csvs));Path('test_dir').rmdir()

In [None]:
from nbdev.export import notebook2script; notebook2script()

Converted 01_stats.ipynb.
Converted index.ipynb.
