# Try me out at https://shikou.glitch.me

In [55]:
import requests
import bs4
import numpy as np
import pandas as pd

In [56]:
# First, gather a list of all the project urls.
project_links_2020 = []
base_url = 'https://hackthenorth2020.devpost.com/project-gallery?page={}'

In [57]:
# Goes through the 25 pages of projects on Hack the North 2020++'s Devpost
for page in range(1,26):
    res = requests.get(base_url.format(page))
    soup = bs4.BeautifulSoup(res.text,'lxml')
    
    # Append all links to the list. 24 projects per page
    for link_index in range(24):
        try:
            project_link = soup.select('.link-to-software')[link_index]
            project_links_2020.append(project_link['href'])
        except:
            break

In [68]:
# Grab list of languages used and store in a pandas DataFrame
projects_2020_df = pd.DataFrame(data = project_links_2020, index = range(597), columns = ['URL'])

In [69]:
# Scrape the languages used for each project and add them to the DataFrame
for index,url in enumerate(project_links_2020):
    res = requests.get(url)
    soup = bs4.BeautifulSoup(res.text,'lxml')
        
    for language in soup.select(".recognized-tag"):
        projects_2020_df.at[index,language.get_text()] = 1

In [70]:
# Sanity check to see if languages have been properly added to DataFrame
projects_2020_df

Unnamed: 0,URL,flutter,flask,python,sqlalchemy,swift,azure,blockchain,css3,express.js,...,eclipse-mc,glitch,google-distance-matrix,facebook-graph,gps,google-cloud-sql,matlab,ruby-on-rails,facebook-login-api,spring
0,https://devpost.com/software/boredombuster-hnimzc,1.0,,,,,,,,,...,,,,,,,,,,
1,https://devpost.com/software/fridgespace,,1.0,1.0,1.0,1.0,,,,,...,,,,,,,,,,
2,https://devpost.com/software/helppier-k5lbru,,1.0,1.0,,,1.0,1.0,1.0,1.0,...,,,,,,,,,,
3,https://devpost.com/software/mashme,,1.0,1.0,,,,,,,...,,,,,,,,,,
4,https://devpost.com/software/mall-monitor,,1.0,1.0,,,1.0,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
592,https://devpost.com/software/docdash,,,,,,,,,,...,,,,,,,,,,
593,https://devpost.com/software/google-docs-to-html,,,,,,,,,1.0,...,,,,,,,,,,
594,https://devpost.com/software/biz-commerce,,,,,,,,,,...,,,,,,,,,,
595,https://devpost.com/software/can-sum-cam,,,1.0,,,,,,,...,,,,,,,,,,


In [163]:
# Fill in null values with 0
projects_2020_df.fillna(0, inplace=True)

In [85]:
# number of languages used in each project
projects_2020_df['total'] = projects_2020_df.sum(axis=1)

In [164]:
projects_2020_df.head()

Unnamed: 0,URL,flutter,flask,python,sqlalchemy,swift,azure,blockchain,css3,express.js,...,glitch,google-distance-matrix,facebook-graph,gps,google-cloud-sql,matlab,ruby-on-rails,facebook-login-api,spring,total
1,https://devpost.com/software/fridgespace,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
2,https://devpost.com/software/helppier-k5lbru,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0
3,https://devpost.com/software/mashme,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0
4,https://devpost.com/software/mall-monitor,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0
5,https://devpost.com/software/cast3d-scanner-fo...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0


In [148]:
# Remove any project that has no recognized language
projects_2020_df = projects_2020_df[projects_2020_df['total'] != 0]

In [106]:
# Don't calculate sums for URL and total columns
use_counts = projects_2020_df.sum()[1:-1]

In [109]:
# Languages sorted by most to least
# 63 languages were used only once!
sorted_use_counts = use_counts.sort_values(ascending = False)

In [121]:
# Top 10 used languages overall
# Used in stats page of Shikou
sorted_use_counts[:20]

javascript        287
python            202
css               175
html              163
react             157
node.js           124
firebase           92
flask              91
express.js         76
html5              66
google-cloud       62
css3               49
java               42
bootstrap          37
azure              34
react-native       27
flutter            27
mongodb            25
c++                24
android-studio     23
dtype: object

In [149]:
# GOAL: Create array of projects with multiple recognized tags for JavaScript
# Start by removing projects using only one tool
projects_2020_df = projects_2020_df[projects_2020_df['total'] != 1]

In [161]:
# Create array for use in JavaScript
print("let item = [")
for index,url in enumerate(projects_2020_df['URL'][:]):
    res = requests.get(url)
    soup = bs4.BeautifulSoup(res.text,'lxml')
    soups = soup.select(".recognized-tag")
    soups_length = len(soups)
    
    if soups_length == 0:
        continue
    else:
        print("\t['",end='')
        for i,language in enumerate(soups):
            print(language.get_text(),end='')
            if i != soups_length - 1:
                print(", ",end='')
        print(f"', '{url}'],")
print("];")

let item = [
	['flask, python, sqlalchemy, swift', 'https://devpost.com/software/fridgespace'],
	['azure, blockchain, css3, express.js, flask, html5, javascript, node.js, python, react', 'https://devpost.com/software/helppier-k5lbru'],
	['bootstrap, flask, google-compute-engine, google-storage, mysql, python, spotify', 'https://devpost.com/software/mashme'],
	['angular.js, azure, css, flask, html, javascript, python', 'https://devpost.com/software/mall-monitor'],
	['arduino, google-spreadsheets, javascript', 'https://devpost.com/software/cast3d-scanner-for-medicinal-casts'],
	['adobe-illustrator, docker, firebase, flask, node.js, react, typescript', 'https://devpost.com/software/legist'],
	['flask, ios, python, swift', 'https://devpost.com/software/jump-ar5890'],
	['azure, electron, javascript, opencv, python, react', 'https://devpost.com/software/ctrlairspace'],
	['express.js, flask, python, react, socket.io', 'https://devpost.com/software/karaoke-party'],
	['css, html, javascript', '

	['css, html, javascript', 'https://devpost.com/software/smoot'],
	['css, flask, html, javascript, python', 'https://devpost.com/software/fakecation-agbmei'],
	['amazon-web-services, git, javascript, mongodb, node.js, xcode', 'https://devpost.com/software/instructions'],
	['azure, flask, python, socket.io', 'https://devpost.com/software/rentogether'],
	['css3, django, google-visualization, html5, javascript, python', 'https://devpost.com/software/no-more-noodles-m2jct9'],
	['express.js, flutter, google-cloud, node.js', 'https://devpost.com/software/curbshop-online'],
	['electron, node.js', 'https://devpost.com/software/onion-notetaking'],
	['android-studio, java, xml', 'https://devpost.com/software/imedapp'],
	['electron, javascript, node.js, postgresql', 'https://devpost.com/software/driftr'],
	['ibm-cloud, javascript', 'https://devpost.com/software/internview-w9objv'],
	['google-cloud, ios, swift', 'https://devpost.com/software/dynamic-image'],
	['android, android-studio, java, machi

	['3dprinting, opencv, python', 'https://devpost.com/software/dualaxiscamera'],
	['firebase, javascript, react', 'https://devpost.com/software/ohmi'],
	['opencv, python, pytorch', 'https://devpost.com/software/face-mask-notifier'],
	['flask, html5, javascript, python, react', 'https://devpost.com/software/grocerbase'],
	['css3, flask, html5, jinja, pandas, python', 'https://devpost.com/software/premier-league-sports-predictor'],
	['javascript, react-native', 'https://devpost.com/software/hello-roomie'],
	['css3, django, google-cloud, html5, javascript, python, react', 'https://devpost.com/software/studyapp-yndt2x'],
	['css, javascript, machine-learning, node.js, python, typescript', 'https://devpost.com/software/steganographia'],
	['c++, css, glsl, javascript, makefile', 'https://devpost.com/software/hackersnest'],
	['firebase, google-maps, react, react-native', 'https://devpost.com/software/easyaccess-vk2urt'],
	['css, html, react', 'https://devpost.com/software/dojo-web-app'],
	['css

	['firebase, flutter, javascript', 'https://devpost.com/software/wish-a-dish-m9eshg'],
	['node.js, react', 'https://devpost.com/software/fugazi'],
	['express.js, javascript, node.js', 'https://devpost.com/software/outline-w0iflc'],
	['css, express.js, google-cloud, heroku, html5, javascript, node.js', 'https://devpost.com/software/highlight-truth-false-info-detector'],
	['chrome, css, html, javascript', 'https://devpost.com/software/class-reminder-extension'],
	['css, express.js, google-cloud, html, ibm-watson, javascript, node.js', 'https://devpost.com/software/eyebotics'],
	['firebase, google-cloud, pandas, python, scikit-learn', 'https://devpost.com/software/halcyon-wavqir'],
	['flask, opencv, python', 'https://devpost.com/software/mask-detector-xi4v7l'],
	['azure, css, express.js, html, javascript, node.js, postgresql, react, slack, sql', 'https://devpost.com/software/aortta'],
	['css3, express.js, html5, javascript, jquery, mongodb, node.js, python, react', 'https://devpost.com/so

	['express.js, mongodb, node.js, react, sendgrid', 'https://devpost.com/software/todo-it'],
	['firebase, javascript, node.js, react', 'https://devpost.com/software/re-meet'],
	['css, flask, html, python', 'https://devpost.com/software/when-should-i-go'],
	['flask, nltk', 'https://devpost.com/software/reviewr'],
	['css, html, javascript, node.js, shell, webrtc', 'https://devpost.com/software/hackthenorth2020-3vqe7c'],
	['firebase, flask, flutter, google-cloud, google-maps, ibm-cloud, ibm-watson, javascript, natural-language-processing, python', 'https://devpost.com/software/accima'],
	['bootstrap, express.js, google-cloud, google-storage, heroku, javascript, mongodb, node.js, postman, react', 'https://devpost.com/software/auditapp'],
	['css, flask, html5, javascript, python, sqlite', 'https://devpost.com/software/demeter-1a2lvs'],
	['amazon-web-services, azure, express.js, react', 'https://devpost.com/software/moodbot-z4f5py'],
	['css, html, javascript', 'https://devpost.com/software/co

	['angular.js, firebase, javascript, python', 'https://devpost.com/software/doctor-bot'],
	['css, html, javascript', 'https://devpost.com/software/helping-hand-fzswkj'],
	['css, dash, flask, html, pandas, plotly, python, spotify', 'https://devpost.com/software/spotinet'],
	['c, css, html, javascript, python, shell', 'https://devpost.com/software/covid19_canada'],
	['blockchain, express.js, firebase, node.js, oauth, react, redis', 'https://devpost.com/software/opencheck'],
	['c++, visual-studio', 'https://devpost.com/software/can-i-change-the-name'],
	['expo.io, react-native', 'https://devpost.com/software/tdar'],
	['express.js, javascript, node.js, react', 'https://devpost.com/software/spotify-party-75f9t3'],
	['css, ejs, html, javascript, mongodb', 'https://devpost.com/software/childproof-extension'],
	['django, python, react', 'https://devpost.com/software/spotifind-q3oytk'],
	['css, express.js, html, javascript, node.js', 'https://devpost.com/software/recipe-io'],
	['css, express.js

	['facebook-login-api, flask, javascript, python, react, sql, sqlalchemy, sqlite', 'https://devpost.com/software/workout-buddies-ikpsng'],
	['bootstrap, css3, google-web-speech-api, html5, node.js, postgresql', 'https://devpost.com/software/notesus'],
	['css, html, javascript, python', 'https://devpost.com/software/hack-the-north-2020'],
	['javascript, python', 'https://devpost.com/software/karaokebot'],
	['java, react, spring, swift', 'https://devpost.com/software/git-3d'],
	['bootstrap, css, flask, html, python', 'https://devpost.com/software/navicourse'],
	['android, javascript, react-native', 'https://devpost.com/software/treecosystem'],
	['css, html, react', 'https://devpost.com/software/cite-safe'],
	['firebase, javascript, react', 'https://devpost.com/software/docdash'],
	['express.js, javascript, react', 'https://devpost.com/software/google-docs-to-html'],
	['facebook, facebook-messenger', 'https://devpost.com/software/biz-commerce'],
	['javascript, json, python, react', 'https