In [None]:
#@title We classified meetings into 7 types: Executive, Team, Design Discussion, Bug Scrub, Code Review, 1:1, and Daily Standup. Click to see our labeling scheme.
'''
Meeting type labeling scheme
1 - Management/Executive
2 - Team meeting
3 - Design discussion
4 - Bug scrub
5 - Code review
6 - 1:1
7 - Daily standup
'''

In [None]:
#@title Let's get started! Run this cell to download and import the required packages.
# Imports
import pandas as pd # for dataframes
import json # parse meeting data
import numpy as np # testing classifier accuracy

from sklearn.model_selection import train_test_split # split data
from sklearn.feature_extraction.text import CountVectorizer # one-hot vectors for words
from sklearn.feature_extraction.text import TfidfTransformer # calculate term frequencies
from sklearn.naive_bayes import MultinomialNB # naive-bayes classifier
from sklearn.linear_model import SGDClassifier # SVM classifier
from sklearn.pipeline import Pipeline

import nltk 
from nltk.corpus import stopwords # removing stopwords
from nltk.tokenize import word_tokenize # splitting string into individual words
nltk.download('stopwords')
nltk.download('punkt')

In [None]:
#@title Next, run this cell to answer some questions.
print('Welcome to Hybridge command-line version!')
TARGET_DATE = input('What date would you like to look at? Please enter in \'M/D/YYYY\' form.\n(To fit our synthesized data, choose something between 8/1 and 8/28/2022!)')
OFFICE_LIMIT = int(input('What\'s the maximum in-person capacity of your office?'))

In [None]:
#@title Great! Now run this cell to classify your meetings.
# Function to convert list of strings to single string
def listToString(l):
  s = ''
  for word in l:
    if word is not l[len(l)-1]:
        s += word + ' '
    else:
        s += word
  return s

# Load JSON object from file
with open('meeting-data.json') as file:
  j = json.load(file)

# Normalize meetings object (flatten into columns)
data = pd.json_normalize(j['meetings'])

# Function to remove stopwords from row's description
def remove_stopwords(row):
  return listToString([word for word in word_tokenize(row['Headline']) if word not in stop_words])

# Lowercase descriptions
data['Headline'].str.lower()

# Remove stop-words and create new column
stop_words = set(stopwords.words('english'))
data['headline-clean'] = data.apply(lambda row:remove_stopwords(row), axis=1)

# Count-vectorize descriptions (one-hot encoding)
count_vec = CountVectorizer()
terms = count_vec.fit_transform(data['headline-clean'])

# Apply TF-IDF (calculates term frequencies)
tfidf_transformer = TfidfTransformer()
terms_tfidf = tfidf_transformer.fit_transform(terms)

# Add to dataframe
data['headline-vectorized'] = terms_tfidf.tocoo().toarray().tolist()

# Split into data/labels (x/y), then train/test
x = data.drop('Label', axis=1)
y = pd.DataFrame({'label':data['Label']})
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.5)

# Fit Naive-Bayes Classifier
nb = MultinomialNB()
nb.fit(x_train['headline-vectorized'].tolist(), y_train['label'].tolist())

# Predict and check accuracy
preds = nb.predict(x_test['headline-vectorized'].tolist())
mean = np.mean(preds == y_test['label'].tolist())

# Recommendation system
x_test['predicted-label'] = preds
sorted_meetings = x_test.loc[(x_test['Date'] == TARGET_DATE)].sort_values(by='predicted-label') # Sort by selected date, priority (based on meeting type)

in_person = 0
count = 0
for attendee_count in sorted_meetings['Attendee-count']:
  if (in_person + attendee_count > OFFICE_LIMIT):
    break
  in_person += attendee_count
  count += 1

in_person_meetings = sorted_meetings.iloc[:count,:]
remote_meetings = sorted_meetings.iloc[count:,:]

print("Here are the meetings you had scheduled for " + TARGET_DATE)
sorted_meetings[['MeetingId', 'Headline', 'Date', 'Attendee-count', 'predicted-label']].style

In [None]:
#@title Run this cell to see our suggestions for in-person meetings!
print('Here\'s the meetings we suggest you prioritize as in-person that day, based on your max occupancy of ' + str(OFFICE_LIMIT))
in_person_meetings[['MeetingId', 'Headline', 'Date', 'Attendee-count', 'predicted-label']].style

In [None]:
#@title Run this cell to see our suggestions for remote meetings!
print('Here\'s the meetings we suggest you leave as remote that day.')
remote_meetings[['MeetingId', 'Headline', 'Date', 'Attendee-count', 'predicted-label']].style