Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .env.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#token to connect to the bot
TOKEN=""
#discord server id
SERVER_ID=""
INTRODUCTIONS_CHANNEL=""
NON_CONTRIBUTOR_ROLES=""

FLASK_HOST=""

SUPABASE_URL=""
SUPABASE_KEY=""

GithubPAT=""

7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#all files relating to python vitual env
/.venv
#environment variables and cache
*.env
*/__pycache__/*
*.csv
/local_only
7 changes: 7 additions & 0 deletions .gitignore copy
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#all files relating to python vitual env
/.venv
#environment variables and cache
*.env
*/__pycache__/*
*.csv
/local_only
116 changes: 116 additions & 0 deletions cogs/discord_data_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from discord.ext import commands, tasks
from discord.channel import TextChannel
from discord import Member
import os, dateutil, json
from datetime import datetime

from utils.db import SupabaseInterface
from utils.api import GithubAPI
import csv

#CONSTANTS
RUNTIME_DATA_DIRECTORY = 'scraping-runtime-data'
RUNTIME_DATA_FILE = 'discordScraperRuntimeData.json'

#check id directory exists for scraping runtime data and create one if it doesn't
def createRuntimeDataDirectory():
cwd = os.getcwd()
path = f'{cwd}/{RUNTIME_DATA_DIRECTORY}'
if not os.path.isdir(path):
os.mkdir(path)

return path




class DiscordDataScaper(commands.Cog):
def __init__(self, bot) -> None:
self.bot = bot
self.runtimeDataDirectory = createRuntimeDataDirectory()

@commands.command()
async def introductions(self, ctx):
guild = ctx.guild if ctx.guild else await self.bot.fetch_guild(os.getenv("SERVER_ID"))
intro_channel = await guild.fetch_channel(os.getenv("INTRODUCTIONS_CHANNEL"))
with open('introduced.csv', 'w') as file:
writer = csv.writer(file)
data = []
async for message in intro_channel.history(limit=None):
row = [message.author.id]
if row not in data:
count+=1
data.append(row)
writer.writerows(data)

@commands.command()
async def not_contributors(self, ctx):
guild = ctx.guild if ctx.guild else await self.bot.fetch_guild(os.getenv("SERVER_ID"))
orgAndMentors = [role for role in os.getenv("NON_CONTRIBUTOR_ROLES").split(',')]
with open("not_contributors.csv", "w") as file:
writer = csv.writer(file)
data = []
async for member in guild.fetch_members(limit=None):
for role in member.roles:
if role.id in orgAndMentors:
user = [member.name, member.id, member.roles]
if user not in data:
data.append(user)
writer.writerows(data)

#Store all messages on Text Channels in the Discord Server to SupaBase
@commands.command()
async def add_messages(self,ctx):

def addMessageData(data):
client = SupabaseInterface("unstructured discord data")
client.insert(data)
return

def recordLastRunTime(data, directory):
with open(f'{directory}/{RUNTIME_DATA_FILE}', 'w+') as file:
json.dump(data, file)

def getLastRunTime(channelId):
with open(f'{self.runtimeDataDirectory}/{RUNTIME_DATA_FILE}', 'r') as file:
data = json.load(file)
lastRuntime = data.get(str(channelId))
if lastRuntime is None:
#all messages will be read
return None
else:
return dateutil.parser.parse(lastRuntime)



guild = await self.bot.fetch_guild(os.getenv("SERVER_ID")) #SERVER_ID Should be C4GT Server ID
channels = await guild.fetch_channels()
runtimeData = {}

for channel in channels:
print(channel.name)
if isinstance(channel, TextChannel): #See Channel Types for info on text channels https://discordpy.readthedocs.io/en/stable/api.html?highlight=guild#discord.ChannelType
messages = []
last_run = getLastRunTime(channel.id)
print(last_run)
async for message in channel.history(limit=None, after =last_run ):
if message.content=='':
continue
msg_data = {
"channel": channel.id,
"channel_name": channel.name,
"text": message.content,
"author": message.author.id,
"author_name": message.author.name,
"author_roles": message.author.roles if isinstance(message.author, Member) else [],
"sent_at":str(message.created_at)
}
messages.append(msg_data)
print(len(messages))
addMessageData(messages)
runtimeData[channel.id] = datetime.now().isoformat()
recordLastRunTime(runtimeData, self.runtimeDataDirectory)
print("Complete!")

async def setup(bot):
await bot.add_cog(DiscordDataScaper(bot))
26 changes: 26 additions & 0 deletions cogs/github_data_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from discord.ext import commands

class GithubDataScraper(commands.Cog):
def __init__(self, bot) -> None:
self.bot = bot


@commands.command()
async def update_prs(self, ctx):
return

@commands.command()
async def update_issues(self, ctx):
return

@commands.command()
async def update_commits(self, ctx):
return






async def setup(bot):
await bot.add_cog(GithubDataScraper(bot))
159 changes: 159 additions & 0 deletions cogs/metrics_tracker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
#Track metrics on github and discord and update the database accordingly
#Implement using: https://discordpy.readthedocs.io/en/stable/ext/tasks/index.html?highlight=tasks#
from discord.ext import commands, tasks
from discord import Member
from discord.channel import TextChannel
from datetime import time, datetime
from models.product import Product
from models.project import Project
from utils.api import GithubAPI
from utils.db import SupabaseInterface
import requests, json
import os, dateutil.parser




class MetricsTracker(commands.Cog):
def __init__(self, bot: commands.Bot) -> None:
self.bot = bot




#Command to assign a channel to a product
@commands.command(aliases=['product','assign', 'assign channel', 'add channel'])
#@commands.has_any_role([])
@commands.has_permissions(administrator=True)
async def assign_channel_to_product(self, ctx, product_name=None):

#Check if product name was given
if product_name is None:
await ctx.channel.send("This command expects the name of the product as an argument like '!assign <product name>'")
return

#Check if channel is a valid type
if str(ctx.channel.type) not in ['text']:
await ctx.channel.send("Only text channels may be assigned to products")
return


#Check if given product name
if not Product.is_product(product_name):
await ctx.channel.send(f"{product_name} is not a valid product name. Please try again.")
return


product = Product(name=product_name)
product.assign_channel(ctx.channel.id)
await ctx.channel.send(f"Channel successfully assigned to product {product_name}")
return

#error handling for assigning channel to product
@assign_channel_to_product.error
async def handle_assignment_error(self, ctx, error):
pass

# async def get_discord_metrics(self):
# # print(1)
# products = Product.get_all_products()

# print(products)

# discord_metrics = {
# "measured_at": datetime.now(),
# "metrics": dict()
# }

# # print(2)

# for product in products:
# # print(3)
# discord_metrics["metrics"][product['name']] = {
# "mentor_messages": 0,
# "contributor_messages": 0
# }
# channel_id = product["channel"]
# channel = await self.bot.fetch_channel(channel_id)

# async for message in channel.history(limit=None):
# # print(4)
# if not isinstance(message.author, Member):
# # print(5)
# continue
# if any(role.name.lower() == 'mentor' for role in message.author.roles):
# discord_metrics["metrics"][product["name"]]['mentor_messages'] +=1

# if any(role.name.lower() == 'contributor' for role in message.author.roles):
# discord_metrics["metrics"][product['name']]['contributor_messages'] +=1
# # print(6)

# r = requests.post(f"""{os.getenv("FLASK_HOST")}/metrics/discord""", json=json.dumps(discord_metrics, indent=4, default=str))
# # print(r.json())

# #Store metrics



# async def get_github_metrics(self):

# #Get all projects in the db
# projects = Project.get_all_projects()

# github_metrics = {
# "updated_at": datetime.now(),
# "metrics": dict()
# }

# for project in projects:
# url_components = str(project['repository']).split('/')
# url_components = [component for component in url_components if component != '']
# # print(url_components)
# [protocol, host, repo_owner, repo_name] = url_components
# api = GithubAPI(owner=repo_owner, repo=repo_name)

# (open_prs, closed_prs) = api.get_pull_request_count()
# (open_issues, closed_issues) = api.get_issue_count()


# github_metrics["metrics"][project["product"]] = {
# "project": project["name"],
# "repository": project["repository"],
# "number_of_commits": api.get_commit_count(),
# "open_prs": open_prs,
# "closed_prs": closed_prs,
# "open_issues": open_issues,
# "closed_issues": closed_issues
# }
# r = requests.post(f"""{os.getenv("FLASK_HOST")}/metrics/github""", json=json.dumps(github_metrics, indent=4, default=str))
# # print(r.json())

# # await ctx.channel.send(github_metrics)

# return

# @tasks.loop(seconds=20.0)
# async def record_metrics(self):
# # print('recording started')
# await self.get_discord_metrics()
# # print('discord done')
# # await self.get_github_metrics()
# # print('metrics recorded')

# @commands.command(aliases=['metrics'])
# # @tasks.loop(seconds=10.0)
# async def update_metrics_periodically(self, ctx, args):
# if args == 'start':
# self.record_metrics.start()
# # await self.get_github_metrics()

# elif args == 'stop':
# self.record_metrics.stop()


# return



async def setup(bot):
await bot.add_cog(MetricsTracker(bot))
Loading