In [0]:
pip install azure-storage-blob

Python interpreter will be restarted.
Collecting azure-storage-blob
  Using cached azure_storage_blob-12.17.0-py3-none-any.whl (388 kB)
Collecting typing-extensions>=4.3.0
  Using cached typing_extensions-4.7.1-py3-none-any.whl (33 kB)
Collecting isodate>=0.6.1
  Using cached isodate-0.6.1-py2.py3-none-any.whl (41 kB)
Collecting azure-core<2.0.0,>=1.28.0
  Using cached azure_core-1.28.0-py3-none-any.whl (185 kB)
Installing collected packages: typing-extensions, isodate, azure-core, azure-storage-blob
  Attempting uninstall: typing-extensions
    Found existing installation: typing-extensions 4.1.1
    Not uninstalling typing-extensions at /databricks/python3/lib/python3.9/site-packages, outside environment /local_disk0/.ephemeral_nfs/envs/pythonEnv-7c050549-77ee-4d91-8cb0-f7242ab19297
    Can't uninstall 'typing-extensions'. No files were found to uninstall.
Successfully installed azure-core-1.28.0 azure-storage-blob-12.17.0 isodate-0.6.1 typing-extensions-4.7.1
Python interpreter will

In [0]:
import requests
import pandas as pd
import json
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
from azure.core.pipeline.transport import HttpResponse
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
from pyspark.sql.functions import from_json, col, explode,to_date, date_format, count, desc, countDistinct, regexp_extract,regexp_replace, udf
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, BooleanType, ArrayType
import gzip
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots



In [0]:
#fetching access key from databricks secret scopes
storage_account_access_key = dbutils.secrets.get("azureStorage","accessKey")

In [0]:
spark = SparkSession.builder.getOrCreate()

storage_account_name = "pod4projectstorage"

container_name = "issues"

spark.conf.set("fs.azure.account.key." + storage_account_name + ".blob.core.windows.net", storage_account_access_key)

df = spark.read.parquet("wasbs://" + container_name + "@" + storage_account_name + ".blob.core.windows.net/")

In [0]:
df.show()

+--------------------+--------------------+--------------------+--------------------+--------------------+-----------+
|           repo_name|            html_url|               title|                body|              labels|     target|
+--------------------+--------------------+--------------------+--------------------+--------------------+-----------+
|CachaSan10/reposi...|https://github.co...|Crear el paquete ...|Se creara el paqu...|{45AAA1, false, S...|      other|
|tuxedoware/rammer...|https://github.co...|🛑 rammerhead51 i...|In [`f69969f`](ht...|{ededed, false, n...|      other|
|tuxedoware/rammer...|https://github.co...|🛑 rammerhead51 i...|In [`f69969f`](ht...|{ededed, false, n...|      other|
|reinhart1010/binu...|https://github.co...|🛑 Library and Kn...|In [`3c37150`](ht...|{ededed, false, n...|      other|
|reinhart1010/binu...|https://github.co...|🛑 Library and Kn...|In [`3c37150`](ht...|{ededed, false, n...|      other|
|digitalmaster/roa...|https://github.co...|How about

In [0]:
df.select("repo_name").distinct().count()

Out[4]: 10266

In [0]:
target_class_count_pd = df.groupBy("target").count().toPandas()

In [0]:
target_class_count_pd

Unnamed: 0,target,count
0,enhancement,7738
1,bug,10581
2,other,36178
3,request,1925


In [0]:
target_fig = go.Figure(data=[go.Pie(labels= target_class_count_pd['target'], values= target_class_count_pd['count'], hole=.3, textinfo='percent+label+value')])
target_fig.show()

In [0]:
Target_by_default = df.groupBy('labels.default', 'target').count().orderBy('labels.default').toPandas()

In [0]:
Target_by_default

Unnamed: 0,default,target,count
0,False,bug,4641
1,False,request,1303
2,False,other,35640
3,False,enhancement,2818
4,True,other,538
5,True,bug,5940
6,True,enhancement,4920
7,True,request,622


In [0]:
Target_by_default_fig = px.bar(Target_by_default, x='target', y='count', color='default', 
             title='Target by Default', barmode='group')

Target_by_default_fig.show()

In [0]:
title_counts = df.filter(col('target') != 'other').groupBy('title','target').count().sort(col('count').desc()).limit(10).toPandas()

In [0]:
title_counts

Unnamed: 0,title,target,count
0,Test Issue,bug,57
1,Bump Microting.eForm from 7.0.41 to 7.0.42,enhancement,46
2,Bump Microting.eFormApi.BasePn from 7.0.34 to ...,enhancement,36
3,Bot IssueEx,bug,34
4,New Bot Issue,bug,30
5,Issue closed.,bug,24
6,[IMPROVEMENT] Consolidate volume attach/detach...,enhancement,16
7,Need a service that has a counter,enhancement,15
8,Bump Magick.NET-Q16-x64 from to,enhancement,13
9,🛑 History of art is down,enhancement,13


In [0]:
title_counts_fig = px.bar(title_counts, y='title', x='count', color='target', 
             title='Top 10 title by target')

title_counts_fig.show()

In [0]:
repo_counts = df.filter(col('target') != 'other').groupBy('repo_name','target').count().sort(col('count').desc()).limit(10).toPandas()

In [0]:
repo_counts

Unnamed: 0,repo_name,target,count
0,appsmithorg/appsmith,bug,128
1,googleapis/google-cloud-ruby,bug,98
2,MicrosoftDocs/azure-docs,enhancement,96
3,department-of-veterans-affairs/va.gov-team,request,92
4,dotnet/perf-autofiling-issues,enhancement,87
5,Expensify/App,bug,75
6,jerryjliu/llama_index,request,73
7,webcompat/web-bugs,bug,73
8,wannacfuture/Battleship,bug,68
9,googleapis/google-cloud-python,bug,67


In [0]:
repo_counts_fig = px.bar(repo_counts, x='repo_name', y='count', color= 'target',
             title='Top 10 repo_name by target' )

repo_counts_fig.show()

In [0]:
import plotly.graph_objs as go
import plotly.subplots as sp
import plotly.express as px


# Arrange the bar charts into a subplot grid
fig_subplots = sp.make_subplots(rows=2, cols=2, subplot_titles=('Repo Counts', 'Title Counts', 'Target by Default'))

fig_subplots.add_trace(Target_by_default_fig.data[0], row=1, col=1)
fig_subplots.add_trace(title_counts_fig.data[0], row=1, col=2)
fig_subplots.add_trace(repo_counts_fig.data[0], row=2, col=1)

# Update the layout
fig_subplots.update_layout(title='Plotly Dashboard')

# Show the dashboard
fig_subplots.show()

# Show the pie chart separately
target_fig.show()

[0;31m---------------------------------------------------------------------------[0m
[0;31mAttributeError[0m                            Traceback (most recent call last)
File [0;32m<command-1583817449964748>:11[0m
[1;32m      8[0m title_counts_fig [38;5;241m=[39m px[38;5;241m.[39mbar(title_counts, y[38;5;241m=[39m[38;5;124m'[39m[38;5;124mtitle[39m[38;5;124m'[39m, x[38;5;241m=[39m[38;5;124m'[39m[38;5;124mcount[39m[38;5;124m'[39m, color[38;5;241m=[39m[38;5;124m'[39m[38;5;124mtarget[39m[38;5;124m'[39m, title[38;5;241m=[39m[38;5;124m'[39m[38;5;124mTop 10 title by target[39m[38;5;124m'[39m)
[1;32m     10[0m [38;5;66;03m# Convert the Plotly Express figures to Bokeh plots[39;00m
[0;32m---> 11[0m repo_counts_bokeh [38;5;241m=[39m pio[38;5;241m.[39mto_bokeh(repo_counts_fig)
[1;32m     12[0m title_counts_bokeh [38;5;241m=[39m pio[38;5;241m.[39mto_bokeh(title_counts_fig)
[1;32m     14[0m [38;5;66;03m# Create the Bokeh figure for 'Targ

In [0]:
!pip install pandas_bokeh
import pandas_bokeh
from ipywidgets import interact

You should consider upgrading via the '/local_disk0/.ephemeral_nfs/envs/pythonEnv-f41beb4b-1142-48e2-b02c-8e8529acb6d0/bin/python -m pip install --upgrade pip' command.[0m


In [0]:
import plotly.express as px
from bokeh.layouts import gridplot
import plotly.io as pio
from bokeh.plotting import figure, show
 
# Create the Plotly Express figures
repo_counts_fig = px.bar(repo_counts, x='repo_name', y='count', color='target', title='Top 10 repo_name by target')
title_counts_fig = px.bar(title_counts, y='title', x='count', color='target', title='Top 10 title by target')

# Create a Bokeh figure for 'Target by Default'
Target_by_default_fig_bokeh = figure(title='Target by Default')
Target_by_default_fig_bokeh.line()  

# Create the subplot layout
fig_subplots = sp.make_subplots(rows=2, cols=2, subplot_titles=('Repo Counts', 'Title Counts', 'Target by Default'))

# Add the Plotly Express figures to the subplot layout
for trace in repo_counts_fig.data:
    fig_subplots.add_trace(trace, row=1, col=1)

for trace in title_counts_fig.data:
    fig_subplots.add_trace(trace, row=1, col=2)

# Add the Bokeh figure to the subplot layout
fig_subplots.add_trace(go.Scatter(...), row=2, col=1)  # Replace ... with your Bokeh line plot code

# Update the layout
fig_subplots.update_layout(title='Plotly Dashboard')

# Show the dashboard
fig_subplots.show()

# Show the pie chart separately using Plotly Express
target_fig.show()


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/databricks/python/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3378, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<command-1178428486066780>", line 11, in <module>
    repo_counts_bokeh = pio.to_bokeh(repo_counts_fig)
  File "/databricks/python/lib/python3.9/site-packages/_plotly_utils/importers.py", line 39, in __getattr__
    raise AttributeError(
AttributeError: module 'plotly.io' has no attribute 'to_bokeh'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/databricks/python/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 1997, in showtraceback
    stb = self.InteractiveTB.structured_traceback(
  File "/databricks/python/lib/python3.9/site-packages/IPython/core/ultratb.py", line 1112, in structured_traceback
    return FormattedTB.structured_traceback(
  File "/databricks/python/lib/python3.9/site-packages

