In [1]:
#Import the
from bokeh.plotting import figure, show
import bokeh.io as nbo
nbo.reset_output()
nbo.output_notebook()
animals = ['lion', 'leopard', 'elephant', 'rhino', 'buffalo']
weight_tonnes = [190, 90, 3000, 2300, 590]

p = figure(x_range=animals, height=350, title="Big Five weight", x_axis_label = "Animal", y_axis_label = "Weight",
           toolbar_location=None, tools="")

p.vbar(x=animals, top=weight_tonnes, width=0.9)

p.xgrid.grid_line_color = None
p.y_range.start = 0

show(p)


In [2]:
import pandas as pd

# Load the dataset from a CSV file
issues_data = pd.read_csv('../datasets/augur/1D-issues.csv')
issues_data.describe()

Unnamed: 0,repo_id,issue_total
count,64.0,64.0
mean,25114.5625,3974.78125
std,3189.085747,8650.317044
min,1.0,1.0
25%,25495.75,142.25
50%,25512.5,976.5
75%,25529.25,2594.0
max,25557.0,44040.0


In [3]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.layouts import row
from bokeh.models import ColumnDataSource
import numpy as np

# To display plots inline in a notebook
output_notebook()

In [4]:

# Display the first few rows of the dataframe to ensure it's loaded correctly
print(issues_data.head())

# Preparing data for the bar chart
repos = issues_data['repo_git'].tolist()
issue_totals = issues_data['issue_total'].tolist()

source = ColumnDataSource(data=dict(repos=repos, issue_totals=issue_totals))

# Create a bar chart
bar_chart = figure(y_range=repos, height=1000, width=800, title="Total Issues per Repository",
                   toolbar_location=None, tools="")

bar_chart.hbar(y='repos', right='issue_totals', height=0.8, source=source, line_color='white', fill_color="#718dbf")

bar_chart.ygrid.grid_line_color = None
bar_chart.x_range.start = 0
bar_chart.xaxis.axis_label = "Total Issues"
bar_chart.yaxis.axis_label = "Repository"
# Display the bar chart
show(bar_chart)

   repo_id                                    repo_git  issue_total
0        1             https://github.com/chaoss/augur          663
1    25480     https://github.com/instructlab/taxonomy          143
2    25481  https://github.com/instructlab/instructlab          745
3    25482      https://github.com/instructlab/website           14
4    25483      https://github.com/instructlab/.github            2


In [5]:
# Creating a histogram for issue_total distribution
hist, edges = np.histogram(issues_data['issue_total'], bins=8, range=[issues_data['issue_total'].min(), issues_data['issue_total'].max()])

histogram = figure(height=300, width=800, title="Distribution of Total Issues",
                   toolbar_location=None, tools="")

histogram.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], fill_color="#036564", line_color="#033649")

histogram.xaxis.axis_label = "Total Issues"
histogram.yaxis.axis_label = "Frequency"

# Display the histogram
show(histogram)

In [6]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.transform import cumsum
from bokeh.models import ColumnDataSource
from math import pi

# To display plots inline in a notebook
output_notebook()

In [7]:
# Load the dataset from a CSV file
data = pd.read_csv('../datasets/augur/1D-Programming-language.csv')
data.describe()

# Display the first few rows of the dataframe to ensure it's loaded correctly
print(data.head(10))

   repo_id programming_language                         repo_git   lines
0        1               Python  https://github.com/chaoss/augur  518623
1        1                  SQL  https://github.com/chaoss/augur  368823
2        1                Jinja  https://github.com/chaoss/augur   28063
3        1                  CSS  https://github.com/chaoss/augur    8091
4        1                 HTML  https://github.com/chaoss/augur    2210
5        1             Makefile  https://github.com/chaoss/augur    1581
6        1                  INI  https://github.com/chaoss/augur     676
7        1                 Mako  https://github.com/chaoss/augur     221
8        1           JavaScript  https://github.com/chaoss/augur     195
9        1             Autoconf  https://github.com/chaoss/augur      52


In [8]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, LabelSet, Segment, Title
from bokeh.palettes import Category20
from math import pi

# To display plots inline in a notebook
output_notebook()

# Group the data by programming_language and sum the lines of code
language_data = data.groupby('programming_language')['lines'].sum().reset_index()

# Calculate the percentage for each language
language_data['percentage'] = language_data['lines'] / language_data['lines'].sum() * 100

# Group languages with less than 2% of the total lines into "Other"
language_data.loc[language_data['percentage'] < 2, 'programming_language'] = 'Other'

# Recompute the percentage and sum lines of code for the "Other" category
language_data = language_data.groupby('programming_language').agg({'lines': 'sum'}).reset_index()
language_data['percentage'] = language_data['lines'] / language_data['lines'].sum() * 100

# Calculate the angle for each slice of the pie chart
language_data['angle'] = language_data['lines'] / language_data['lines'].sum() * 2 * pi

# Calculate the coordinates for label placement outside the pie chart
language_data['angle_cumsum'] = language_data['angle'].cumsum()
language_data['angle_cumsum_shifted'] = language_data['angle_cumsum'].shift(fill_value=0)
language_data['angle_mid'] = (language_data['angle_cumsum_shifted'] + language_data['angle_cumsum']) / 2

# Set the radius for labels and lines
label_radius = 1.2  # Further from the pie chart for labels
line_radius = 0.6  # Inside the edge of the pie chart

# Calculate label positions and line connections
language_data['x_label'] = label_radius * np.cos(language_data['angle_mid'])
language_data['y_label'] = 1 + label_radius * np.sin(language_data['angle_mid'])

language_data['x_line'] = line_radius * np.cos(language_data['angle_mid'])
language_data['y_line'] = 1 + line_radius * np.sin(language_data['angle_mid'])

# Adjust label positions to ensure no overlap with lines
# Labels on the right side move slightly right, and on the left side slightly left
language_data['x_label'] += np.where(language_data['angle_mid'] > pi, -0.15, 0.15)

# Adjust the y_label slightly up or down to further prevent overlapping with the lines
language_data['y_label'] += np.where(language_data['angle_mid'] > pi, -0.15, 0.15)

# Use a Bokeh palette; Category20 gives 20 distinct colors
palette = Category20[20] * (len(language_data) // 20 + 1)
language_data['color'] = palette[:len(language_data)]

# Convert the data to a ColumnDataSource
source = ColumnDataSource(language_data)

# Create a pie chart
pie_chart = figure(height=800, width=800, title="Share of Programming Languages by Lines of Code",
                   toolbar_location=None, tools="hover", tooltips="@programming_language: @percentage{0.2f}%", x_range=(-1.5, 1.5))

pie_chart.wedge(x=0, y=1, radius=0.7,
                start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
                line_color="white", fill_color='color', legend_field='programming_language', source=source)

# Draw the lines connecting the labels to the pie chart slices
pie_chart.segment(x0='x_line', y0='y_line', x1='x_label', y1='y_label', line_width=1.5, line_color='black', source=source)

# Add labels to the slices, slightly outside the pie chart
labels = LabelSet(x='x_label', y='y_label', text='programming_language', source=source, text_font_size="8pt",
                  text_align='center', text_baseline='middle')
pie_chart.add_layout(labels)

# Configure the legend
pie_chart.legend.title = "Programming Language"
pie_chart.legend.label_text_font_size = "8pt"
pie_chart.legend.location = "bottom_right"
pie_chart.legend.orientation = "vertical"

# Add a caption
caption = Title(text="Languages, yo", align="center")
pie_chart.add_layout(caption, 'below')

# Display the pie chart
show(pie_chart)

In [9]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, LabelSet, Segment, Title
from bokeh.palettes import Colorblind
from math import pi

# To display plots inline in a notebook
output_notebook()

# Group the data by programming_language and sum the lines of code
language_data = data.groupby('programming_language')['lines'].sum().reset_index()

# Calculate the percentage for each language
language_data['percentage'] = language_data['lines'] / language_data['lines'].sum() * 100

# Group languages with less than 2% of the total lines into "Other"
language_data.loc[language_data['percentage'] < 2, 'programming_language'] = 'Other'

# Recompute the percentage and sum lines of code for the "Other" category
language_data = language_data.groupby('programming_language').agg({'lines': 'sum'}).reset_index()
language_data['percentage'] = language_data['lines'] / language_data['lines'].sum() * 100

# Calculate the angle for each slice of the pie chart
language_data['angle'] = language_data['lines'] / language_data['lines'].sum() * 2 * pi

# Calculate the coordinates for label placement outside the pie chart
language_data['angle_cumsum'] = language_data['angle'].cumsum()
language_data['angle_cumsum_shifted'] = language_data['angle_cumsum'].shift(fill_value=0)
language_data['angle_mid'] = (language_data['angle_cumsum_shifted'] + language_data['angle_cumsum']) / 2

# Set the radius for labels and lines
label_radius = 1.2  # Further from the pie chart for labels
line_radius = 0.6  # Inside the edge of the pie chart

# Calculate label positions and line connections
language_data['x_label'] = label_radius * np.cos(language_data['angle_mid'])
language_data['y_label'] = 1 + label_radius * np.sin(language_data['angle_mid'])

language_data['x_line'] = line_radius * np.cos(language_data['angle_mid'])
language_data['y_line'] = 1 + line_radius * np.sin(language_data['angle_mid'])

# Adjust label positions to ensure no overlap with lines
# Labels on the right side move slightly right, and on the left side slightly left
language_data['x_label'] += np.where(language_data['angle_mid'] > pi, -0.15, 0.15)

# Adjust the y_label slightly up or down to further prevent overlapping with the lines
language_data['y_label'] += np.where(language_data['angle_mid'] > pi, -0.15, 0.15)

# Use a colorblind-safe palette, cycling if there are more categories than colors
color_count = min(len(language_data), max(Colorblind.keys()))
palette = Colorblind[color_count] * (len(language_data) // color_count + 1)

# Assign colors to the language data
language_data['color'] = palette[:len(language_data)]

# Convert the data to a ColumnDataSource
source = ColumnDataSource(language_data)

# Create a pie chart
pie_chart = figure(height=800, width=800, title="Share of Programming Languages by Lines of Code",
                   toolbar_location=None, tools="hover", tooltips="@programming_language: @percentage{0.2f}%", x_range=(-1.5, 1.5))

pie_chart.wedge(x=0, y=1, radius=0.7,
                start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
                line_color="white", fill_color='color', legend_field='programming_language', source=source)

# Draw the lines connecting the labels to the pie chart slices
pie_chart.segment(x0='x_line', y0='y_line', x1='x_label', y1='y_label', line_width=1.5, line_color='black', source=source)

# Add labels to the slices, slightly outside the pie chart
labels = LabelSet(x='x_label', y='y_label', text='programming_language', source=source, text_font_size="8pt",
                  text_align='center', text_baseline='middle')
pie_chart.add_layout(labels)

# Configure the legend
pie_chart.legend.title = "Programming Language"
pie_chart.legend.label_text_font_size = "8pt"
pie_chart.legend.location = "center_right"
pie_chart.legend.orientation = "vertical"

# Add a caption
caption = Title(text="Languages, yo", align="center")
pie_chart.add_layout(caption, 'below')

# Display the pie chart
show(pie_chart)