In [1]:
#create the list of schools we want to study
schools = {
    'ironhack': 10828,
    'app-academy': 10525,
    'le-wagon': 10868,
    'ubiqum-code-academy': 11111,
    'udacity': 11118,
}

import re
import pandas as pd
from pandas.io.json import json_normalize
import requests


def get_comments_school(school):
    #create a regex expressionn that will search for the html tags
    TAG_RE = re.compile(r'<[^>]+>')
    # defines url to make api call to data -> dynamic with school if you want to scrape competition
    url = "https://www.switchup.org/chimera/v1/school-review-list?mainTemplate=school-review-list&path=%2Fbootcamps%2F" + school + "&isDataTarget=false&page=3&perPage=10000&simpleHtml=true&truncationLength=250"
    # makes get request and converts answer to json
    # url defines the page of all the information, request is made, and information is returned to data variable
    data = requests.get(url).json()
    # converts json to dataframe
    reviews = pd.DataFrame(data['content']['reviews'])

    # aux function to apply regex and remove tags
    def remove_tags(x):
        return TAG_RE.sub('', x)
    #create a column in reviews "review_body" that will be populated with another columns created 'body' that will be remove all the html tags
    reviews['review_body'] = reviews['body'].apply(remove_tags)
    # create a column 'school' with the school
    reviews['school'] = school
    #return the table with all the reviews of a defined school
    return reviews



#list comprehension
comments=[get_comments_school(school) for school in schools.keys()]

comments = pd.concat(comments)

from pandas.io.json import json_normalize


def get_school_info(school, school_id):
    #create the URL for each school
    url = 'https://www.switchup.org/chimera/v1/bootcamp-data?mainTemplate=bootcamp-data%2Fdescription&path=%2Fbootcamps%2F' + str(
        school) + '&isDataTarget=false&bootcampId=' + str(
        school_id) + '&logoTag=logo&truncationLength=250&readMoreOmission=...&readMoreText=Read%20More&readLessText=Read%20Less'

    #create a json fil
    data = requests.get(url).json()

    data.keys()

    courses = data['content']['courses']
    courses_df = pd.DataFrame(courses, columns=['courses'])

    locations = data['content']['locations']
    locations_df = json_normalize(locations)

    badges_df = pd.DataFrame(data['content']['meritBadges'])

    website = data['content']['webaddr']
    description = data['content']['description']
    logoUrl = data['content']['logoUrl']
    school_df = pd.DataFrame([website, description, logoUrl]).T
    school_df.columns = ['website', 'description', 'LogoUrl']

    locations_df['school'] = school
    courses_df['school'] = school
    badges_df['school'] = school
    school_df['school'] = school

    # how could you write a similar block of code to the above in order to record the school ID?

    locations_df['school_id'] = school_id
    courses_df['school_id'] = school_id
    badges_df['school_id'] = school_id
    school_df['school_id'] = school_id

    return locations_df, courses_df, badges_df, school_df


locations_list = []
courses_list = []
badges_list = []
schools_list = []

for school, id in schools.items():
    print(school)
    a, b, c, d = get_school_info(school, id)

    locations_list.append(a)
    courses_list.append(b)
    badges_list.append(c)
    schools_list.append(d)

locations = pd.concat(locations_list)
locations.head(10)

courses = pd.concat(courses_list)
courses.head(10)

badges = pd.concat(badges_list)
badges.head(10)

schools = pd.concat(schools_list)
schools



print(schools.head())

ironhack




app-academy
le-wagon
ubiqum-code-academy
udacity
                                             website  \
0                                www.ironhack.com/en   
0                                      appacademy.io   
0                                    www.lewagon.com   
0  ubiqum.com/?utm_source=switchup&utm_medium=pla...   
0  www.udacity.com/?utm_source=switchup&utm_mediu...   

                                         description  \
0  <span class="truncatable"><p>Ironhack is a glo...   
0  <span class="truncatable"><p>App Academy is a ...   
0  <span class="truncatable"><p>Le Wagon runs imm...   
0  <span class="truncatable"><p>Ubiqum Code Acade...   
0  <span class="truncatable"><p>Udacity is a glob...   

                                             LogoUrl               school  \
0  https://d92mrp7hetgfk.cloudfront.net/images/si...             ironhack   
0  https://d92mrp7hetgfk.cloudfront.net/images/si...          app-academy   
0  https://d92mrp7hetgfk.cloudfront.net/images

### Null values in every dataframe

In [2]:
courses.isnull().sum()

courses      0
school       0
school_id    0
dtype: int64

In [3]:
schools.isnull().sum()

website        0
description    0
LogoUrl        0
school         0
school_id      0
dtype: int64

In [4]:
locations.isnull().sum()

id                 0
description        0
country.id         3
country.name       3
country.abbrev     3
city.id            3
city.name          3
city.keyword       3
state.id          53
state.name        53
state.abbrev      53
state.keyword     56
school             0
school_id          0
dtype: int64

In [5]:
comments.isnull().sum()

id                    0
name                  0
anonymous             0
hostProgramName    4484
graduatingYear        8
isAlumni             12
jobTitle           3318
tagline               1
body                  0
createdAt             0
queryDate             0
program             632
user                  0
overallScore         11
comments              0
overall              13
curriculum          121
jobSupport          339
review_body           0
school                0
dtype: int64

In [6]:
badges.isnull().sum()

name           0
keyword        0
description    0
school         0
school_id      0
dtype: int64

### Shape of every dataframe

In [7]:
courses.shape

(48, 3)

In [8]:
schools.shape

(5, 5)

In [9]:
locations.shape

(59, 14)

In [10]:
comments.shape

(4768, 20)

In [11]:
badges.shape

(7, 5)

### Dataframe tables

In [12]:
courses

Unnamed: 0,courses,school,school_id
0,Data Analytics Bootcamp,ironhack,10828
1,Data Analytics Part-Time,ironhack,10828
2,UX/UI Design Bootcamp,ironhack,10828
3,UX/UI Design Part-Time,ironhack,10828
4,Web Development Bootcamp,ironhack,10828
5,Web Development Part-Time,ironhack,10828
0,Bootcamp Prep,app-academy,10525
1,Software Engineer Track: In-Person,app-academy,10525
2,Software Engineer Track: Online,app-academy,10525
0,Web Development Course - Full-Time,le-wagon,10868


In [13]:
schools

Unnamed: 0,website,description,LogoUrl,school,school_id
0,www.ironhack.com/en,"<span class=""truncatable""><p>Ironhack is a glo...",https://d92mrp7hetgfk.cloudfront.net/images/si...,ironhack,10828
0,appacademy.io,"<span class=""truncatable""><p>App Academy is a ...",https://d92mrp7hetgfk.cloudfront.net/images/si...,app-academy,10525
0,www.lewagon.com,"<span class=""truncatable""><p>Le Wagon runs imm...",https://d92mrp7hetgfk.cloudfront.net/images/si...,le-wagon,10868
0,ubiqum.com/?utm_source=switchup&utm_medium=pla...,"<span class=""truncatable""><p>Ubiqum Code Acade...",https://d92mrp7hetgfk.cloudfront.net/images/si...,ubiqum-code-academy,11111
0,www.udacity.com/?utm_source=switchup&utm_mediu...,"<span class=""truncatable""><p>Udacity is a glob...",https://d92mrp7hetgfk.cloudfront.net/images/si...,udacity,11118


In [14]:
locations

Unnamed: 0,id,description,country.id,country.name,country.abbrev,city.id,city.name,city.keyword,state.id,state.name,state.abbrev,state.keyword,school,school_id
0,15901,"Berlin, Germany",57.0,Germany,DE,31156.0,Berlin,berlin,,,,,ironhack,10828
1,16022,"Mexico City, Mexico",29.0,Mexico,MX,31175.0,Mexico City,mexico-city,,,,,ironhack,10828
2,16086,"Amsterdam, Netherlands",59.0,Netherlands,NL,31168.0,Amsterdam,amsterdam,,,,,ironhack,10828
3,16088,"Sao Paulo, Brazil",42.0,Brazil,BR,31121.0,Sao Paulo,sao-paulo,,,,,ironhack,10828
4,16109,"Paris, France",38.0,France,FR,31136.0,Paris,paris,,,,,ironhack,10828
5,16375,"Miami, FL, United States",1.0,United States,US,31.0,Miami,miami,11.0,Florida,FL,florida,ironhack,10828
6,16376,"Madrid, Spain",12.0,Spain,ES,31052.0,Madrid,madrid,,,,,ironhack,10828
7,16377,"Barcelona, Spain",12.0,Spain,ES,31170.0,Barcelona,barcelona,,,,,ironhack,10828
8,16709,"Lisbon, Portugal",28.0,Portugal,PT,31075.0,Lisbon,lisbon,,,,,ironhack,10828
9,17233,Online,,,,,,,1.0,Online,Online,,ironhack,10828


In [15]:
comments[2000:2050]

Unnamed: 0,id,name,anonymous,hostProgramName,graduatingYear,isAlumni,jobTitle,tagline,body,createdAt,queryDate,program,user,overallScore,comments,overall,curriculum,jobSupport,review_body,school
159,263208,Jahaira Castaneda,False,,2020.0,False,,Le Wagon - London,"<span class=""truncatable""><p>My choice to join...",4/2/2020,2020-04-02,FullStack program,{'image': None},4.3,[],5.0,5.0,3.0,My choice to join Le Wagon’s 9-week intensive ...,le-wagon
160,263206,Boyan Levchev,False,,2020.0,False,,Very happy Le Wagon graduate,"<span class=""truncatable""><p>I was a bit worri...",4/2/2020,2020-04-02,FullStack program,{'image': None},4.7,[],5.0,5.0,4.0,I was a bit worried before starting Le Wagon t...,le-wagon
161,263202,Nicoleta Dudau,False,,2020.0,False,,Le Wagon Lausanne - my best ever choise,"<span class=""truncatable""><p>After working a f...",4/2/2020,2020-04-02,FullStack program,{'image': None},4.7,[],5.0,5.0,4.0,After working a few years in human resourcing ...,le-wagon
162,263188,Tori,False,,2019.0,False,,"Great bootcamp, amazing experience","<span class=""truncatable""><p>Before Le Wagon I...",4/1/2020,2020-04-01,FullStack program,{'image': None},5.0,[],5.0,5.0,5.0,Before Le Wagon I was an Engineer with no real...,le-wagon
163,263178,Yago Lomondo,False,,2020.0,False,,It's not about where you are but where you are...,"<span class=""truncatable""><p>Before starting L...",4/1/2020,2020-04-01,FullStack program,{'image': None},5.0,[],5.0,5.0,5.0,Before starting Le Wagon I was studying engine...,le-wagon
164,263177,Lucas Diniz Cunha,False,,2020.0,False,,"Rio #358, the best batch ever!","<span class=""truncatable""><p>- what you did be...",4/1/2020,2020-04-01,FullStack program,{'image': None},5.0,[],5.0,5.0,5.0,- what you did before Le Wagon I was/still am ...,le-wagon
165,263176,Vitor Tavares,False,,2020.0,False,,Rio #358 batch really changed my life,"<span class=""truncatable""><p>9 hard-working-fa...",4/1/2020,2020-04-01,FullStack program,{'image': None},5.0,[],5.0,5.0,5.0,9 hard-working-fast-paced weeks. I need to say...,le-wagon
166,263169,Joaquín Panelati,False,,2020.0,False,,When you look back and can't believe how much ...,"<span class=""truncatable""><p>My experience in ...",4/1/2020,2020-04-01,FullStack program,{'image': None},5.0,[],5.0,5.0,5.0,My experience in the bootcamp was excellent. T...,le-wagon
167,263153,Mike Warren,False,,2020.0,False,,Excellent Experience,"<span class=""truncatable""><p>Le Wagon Tokyo wa...",4/1/2020,2020-04-01,FullStack program,{'image': None},5.0,[],5.0,5.0,5.0,Le Wagon Tokyo was such an amazing 9-week expe...,le-wagon
168,263115,Kaori Kohama,False,,2020.0,False,,Amazing experience at Le Wagon Amsterdam,"<span class=""truncatable""><p>Before starting b...",3/31/2020,2020-03-31,FullStack program,{'image': None},4.7,[],5.0,5.0,4.0,"Before starting bootcamp, I had no experience ...",le-wagon


In [16]:
badges

Unnamed: 0,name,keyword,description,school,school_id
0,Available Online,available_online,<p>School offers fully online courses</p>,ironhack,10828
1,Verified Outcomes,verified_outcomes,<p>School publishes a third-party verified out...,ironhack,10828
2,Flexible Classes,flexible_classes,<p>School offers part-time and evening classes...,ironhack,10828
0,Available Online,available_online,<p>School offers fully online courses</p>,app-academy,10525
1,Job Guarantee,job_guarantee,<p>School guarantees job placement</p>,app-academy,10525
0,Flexible Classes,flexible_classes,<p>School offers part-time and evening classes...,le-wagon,10868
0,Flexible Classes,flexible_classes,<p>School offers part-time and evening classes...,ubiqum-code-academy,11111


### Data insights

In [44]:
set(courses['courses'].unique())

{'AI Programming with Python',
 'Al Product Manager',
 'Android Basics',
 'Android Developer',
 'Artificial Intelligence',
 'Artificial Intelligence for Trading',
 'Blockchain Developer',
 'Bootcamp Prep',
 'Business Analytics',
 'C++',
 'Cloud Dev Ops Engineer',
 'Cloud Developer',
 'Computer Vision',
 'Data Analyst',
 'Data Analytics & Machine Learning',
 'Data Analytics Bootcamp',
 'Data Analytics Part-Time',
 'Data Engineer',
 'Data Science - Full-Time',
 'Data Structures and Algorithms',
 'Data Visualization',
 'Deep Learning',
 'Deep Reinforcement Learning',
 'Digital Marketing',
 'Front End Web Developer',
 'Full Stack Web Developer',
 'Introduction to Machine Learning',
 'Introduction to Programming',
 'Java Developer',
 'Machine Learning Engineer',
 'Marketing Analytics',
 'Natural Language Processing',
 'Predictive Analytics for Business',
 'React',
 'Robotics Software Engineer',
 'Self-Driving Car Engineer',
 'Software Engineer Track: In-Person',
 'Software Engineer Track: O

In [46]:
mydict = {}

for x in courses['courses'].unique():
    if x not in mydict.keys():
        mydict[x] = 0
    else:
        continue
mydict

def word_finder(keys):
    global mydict
    
    a = set(['data', 'analytics', 'analyst', 'science'])
    b = set(['web', 'development', 'software', 'developer'])
    c = set(['UX', 'UI', 'design', 'designer'])
    d = set(['online', 'remote'])

    for key in keys:     
        if len(a.intersection(set(key.lower().split(" ")))) != 0 :
            mydict[key] = 'data analysis/data science' + ' related course'
            #break
        elif len(b.intersection(set(key.lower().split(" ")))) != 0 :
            mydict[key] = 'web development' + ' related course'
            #break
        elif len(c.intersection(set(key.lower().split(" ")))) != 0 :
            mydict[key] = 'UX/UI Design' + ' related course'
            #break
        elif len(d.intersection(set(key.lower().split(" ")))) != 0 :
            mydict[key] = 'online course'
            #break
        else:
            mydict[key] = 'other courses'      
            
    return mydict
        
word_finder(list(mydict.keys()))

{'Data Analytics Bootcamp': 'data analysis/data science related course',
 'Data Analytics Part-Time': 'data analysis/data science related course',
 'UX/UI Design Bootcamp': 'UX/UI Design related course',
 'UX/UI Design Part-Time': 'UX/UI Design related course',
 'Web Development Bootcamp': 'web development related course',
 'Web Development Part-Time': 'web development related course',
 'Bootcamp Prep': 'other courses',
 'Software Engineer Track: In-Person': 'web development related course',
 'Software Engineer Track: Online': 'web development related course',
 'Web Development Course - Full-Time': 'web development related course',
 'Web Development Course - Part-Time': 'web development related course',
 'Data Science - Full-Time': 'data analysis/data science related course',
 'Data Analytics & Machine Learning': 'data analysis/data science related course',
 'Web Development with Java': 'web development related course',
 'Web Development with JavaScript': 'web development related cours

In [48]:
global mydict
set(mydict.values())

{'UX/UI Design related course',
 'data analysis/data science related course',
 'other courses',
 'web development related course'}

### Creating the new table

In [49]:
courses

Unnamed: 0,courses,school,school_id,courses by group
0,Data Analytics Bootcamp,ironhack,10828,data analysis/data science related course
1,Data Analytics Part-Time,ironhack,10828,data analysis/data science related course
2,UX/UI Design Bootcamp,ironhack,10828,UX/UI Design related course
3,UX/UI Design Part-Time,ironhack,10828,UX/UI Design related course
4,Web Development Bootcamp,ironhack,10828,web development related course
5,Web Development Part-Time,ironhack,10828,web development related course
0,Bootcamp Prep,app-academy,10525,other courses
1,Software Engineer Track: In-Person,app-academy,10525,other courses
2,Software Engineer Track: Online,app-academy,10525,online course
0,Web Development Course - Full-Time,le-wagon,10868,web development related course


In [50]:
a = courses['courses']
courses['courses by group'] = a #create a new column

In [51]:
courses

Unnamed: 0,courses,school,school_id,courses by group
0,Data Analytics Bootcamp,ironhack,10828,Data Analytics Bootcamp
1,Data Analytics Part-Time,ironhack,10828,Data Analytics Part-Time
2,UX/UI Design Bootcamp,ironhack,10828,UX/UI Design Bootcamp
3,UX/UI Design Part-Time,ironhack,10828,UX/UI Design Part-Time
4,Web Development Bootcamp,ironhack,10828,Web Development Bootcamp
5,Web Development Part-Time,ironhack,10828,Web Development Part-Time
0,Bootcamp Prep,app-academy,10525,Bootcamp Prep
1,Software Engineer Track: In-Person,app-academy,10525,Software Engineer Track: In-Person
2,Software Engineer Track: Online,app-academy,10525,Software Engineer Track: Online
0,Web Development Course - Full-Time,le-wagon,10868,Web Development Course - Full-Time


In [52]:
mydict

{'Data Analytics Bootcamp': 'data analysis/data science related course',
 'Data Analytics Part-Time': 'data analysis/data science related course',
 'UX/UI Design Bootcamp': 'UX/UI Design related course',
 'UX/UI Design Part-Time': 'UX/UI Design related course',
 'Web Development Bootcamp': 'web development related course',
 'Web Development Part-Time': 'web development related course',
 'Bootcamp Prep': 'other courses',
 'Software Engineer Track: In-Person': 'web development related course',
 'Software Engineer Track: Online': 'web development related course',
 'Web Development Course - Full-Time': 'web development related course',
 'Web Development Course - Part-Time': 'web development related course',
 'Data Science - Full-Time': 'data analysis/data science related course',
 'Data Analytics & Machine Learning': 'data analysis/data science related course',
 'Web Development with Java': 'web development related course',
 'Web Development with JavaScript': 'web development related cours

### Final courses table

In [54]:
courses['courses by group'] = courses['courses by group'].replace(mydict)
courses

Unnamed: 0,courses,school,school_id,courses by group
0,Data Analytics Bootcamp,ironhack,10828,data analysis/data science related course
1,Data Analytics Part-Time,ironhack,10828,data analysis/data science related course
2,UX/UI Design Bootcamp,ironhack,10828,UX/UI Design related course
3,UX/UI Design Part-Time,ironhack,10828,UX/UI Design related course
4,Web Development Bootcamp,ironhack,10828,web development related course
5,Web Development Part-Time,ironhack,10828,web development related course
0,Bootcamp Prep,app-academy,10525,other courses
1,Software Engineer Track: In-Person,app-academy,10525,web development related course
2,Software Engineer Track: Online,app-academy,10525,web development related course
0,Web Development Course - Full-Time,le-wagon,10868,web development related course
