In [1]:
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup

url = 'https://www.switchup.org/chimera/v1/bootcamp-list?mainTemplate=bootcamp-list%2Frankings&path=%2Frankings%2Fbest-coding-bootcamps&isDataTarget=false&featuredSchools=0&logoTag=logo&logoSize=original&numSchools=0&perPage=0&rankType=BootcampRankings&rankYear=2020&recentReview=true&reviewLength=50&numLocations=5&numSubjects=5&numCourses=5&sortOn=name&withReviews=false'

data = requests.get(url).json()

bootcamp_dict = {}

for i, bootcamp in enumerate(data['content']['bootcamps'], 1):
    soup = BeautifulSoup(bootcamp['name'], 'html.parser')
    bootcamp_name = soup.get_text(strip=True)
    
    # Convert to lowercase and replace spaces with hyphens
    formatted_name = bootcamp_name.lower().replace(' & ', '-').replace(' ', '-')
    
    
    # Add bootcamp formatted name and ID to the dictionary
    bootcamp_dict[formatted_name] = bootcamp['id']



# Print the resulting dictionary
print(bootcamp_dict)

{'le-wagon': 10868, 'app-academy': 10525, 'ironhack': 10828, 'general-assembly': 10761, 'hackwagon-academy': 10792, 'springboard': 11035, 'udacity': 11118, 'thinkful': 11098, 'nyc-data-science-academy': 10925, 'dataquest': 10683, 'coding-dojo': 10659, 'juno-college-of-technology': 10787, 'flatiron-school': 10748, 'hack-reactor': 10788, 'the-tech-academy': 11091, 'product-school': 10960, 'codesmith': 10643, 'actualize': 10505, 'makers-academy': 10874, 'product-gym': 10959, 'brainstation': 10571, 'bloomtech': 10854, 'tech-talent-strategy': 11059, 'epicodus': 10739, 'rmotr': 10987, 'uxer-school': 11151, 'designlab': 10697, 'fullstack-academy': 10751, 'tech-elevator': 11056, 'isdi-coders': 11024, 'ubiqum-code-academy': 11111, 'digitalcrafts': 10719, 'the-software-guild': 11090, 'learningfuze': 10862, 'careerfoundry': 10581, 'lighthouse-labs': 10869, 'code-fellows': 10614, 'galvanize': 10754, 'claim-academy': 10589, 'wyncode': 11174, 'noble-desktop': 10914, 'metis': 10886, 'altcademy': 1051

In [2]:
# you must populate this dict with the schools required -> try talking to the teaching team about this


# schools = {   
# 'ironhack' : 10828,
# 'app-academy' : 10525,
# 'springboard' : 11035,
# 'udacity' : 11118,
# }
# 

schools = bootcamp_dict

import re
import pandas as pd
from pandas.io.json import json_normalize
import requests



def get_comments_school(school):
    TAG_RE = re.compile(r'<[^>]+>')
    # defines url to make api call to data -> dynamic with school if you want to scrape competition
    url = "https://www.switchup.org/chimera/v1/school-review-list?mainTemplate=school-review-list&path=%2Fbootcamps%2F" + school + "&isDataTarget=false&page=3&perPage=10000&simpleHtml=true&truncationLength=250"

    #makes get request and converts answer to json
    # url defines the page of all the information, request is made, and information is returned to data variable
    data = requests.get(url).json()
    #converts json to dataframe
    reviews =  pd.DataFrame(data['content']['reviews'])
  
    #aux function to apply regex and remove tags
    def remove_tags(x):
        return TAG_RE.sub('',x)
    reviews['review_body'] = reviews['body'].apply(remove_tags)
    reviews['school'] = school
    return reviews

In [3]:
# could you write this as a list comprehension? ;)
comments = []

for school in schools.keys():

    comments.append(get_comments_school(school))

comments = pd.concat(comments)


In [4]:


# cleaning the table
col_remove = ['body', 'rawBody', 'user', 'comments']
comments_clean = comments.drop(columns=col_remove)

# let's change int64 types for int, as there is a error later
# comments_clean = comments_clean.applymap(lambda x: int(x) if isinstance(x, np.int64) else x)

comments_clean.head(3)

Unnamed: 0,id,name,anonymous,hostProgramName,graduatingYear,isAlumni,jobTitle,tagline,createdAt,queryDate,program,overallScore,overall,curriculum,jobSupport,review_body,school
0,306495,Nuria Aires,False,,2021.0,False,,"No regrets, it was perfect, I got what I wanted",11/15/2023,2023-11-15,Data Science & AI - Full-Time,5.0,5.0,5.0,5.0,"Le Wagon Le Wagon changed my life, literally. ...",le-wagon
1,305425,Minnerva Sasu,False,,2023.0,False,,Amazing!!!,10/5/2023,2023-10-05,Data Science & AI - Full-Time,5.0,5.0,5.0,5.0,"In 9 weeks, I went from procrastinating to doi...",le-wagon
2,305313,Alexander Pegot,False,,2022.0,False,Data Analyst,Great Experience,10/2/2023,2023-10-02,Data Science & AI - Full-Time,5.0,5.0,5.0,5.0,After making the decision to pursue a career i...,le-wagon


In [6]:
import pandas as pd
import requests

def get_school_info(school, school_id):
    url = 'https://www.switchup.org/chimera/v1/bootcamp-data?mainTemplate=bootcamp-data%2Fdescription&path=%2Fbootcamps%2F'+ str(school) + '&isDataTarget=false&bootcampId='+ str(school_id) + '&logoTag=logo&truncationLength=250&readMoreOmission=...&readMoreText=Read%20More&readLessText=Read%20Less'

    data = requests.get(url).json()

    data.keys()

    courses = data['content']['courses']
    courses_df = pd.DataFrame(courses, columns= ['courses'])

    locations = data['content']['locations']
    locations_df = pd.json_normalize(locations)

    badges_df = pd.DataFrame(data['content']['meritBadges'])
    
    website = data['content']['webaddr']
    description = data['content']['description']
    logoUrl = data['content']['logoUrl']
    school_df = pd.DataFrame([website,description,logoUrl]).T
    school_df.columns =  ['website','description','LogoUrl']

    locations_df['school'] = school
    courses_df['school'] = school
    badges_df['school'] = school
    school_df['school'] = school
    

    locations_df['school_id'] = school_id
    courses_df['school_id'] = school_id
    badges_df['school_id'] = school_id
    school_df['school_id'] = school_id

    return locations_df, courses_df, badges_df, school_df

locations_list = []
courses_list = []
badges_list = []
schools_list = []

for school, id in schools.items():

    a, b, c, d = get_school_info(school, id)
    
    locations_list.append(a)
    courses_list.append(b)
    badges_list.append(c)
    schools_list.append(d)





In [22]:
locations = pd.concat(locations_list)
locations.head(5)

Unnamed: 0,id,description,country.id,country.name,country.abbrev,city.id,city.name,city.keyword,state.id,state.name,state.abbrev,state.keyword,school,school_id
0,15803,"Melbourne, Australia",20.0,Australia,AU,31174.0,Melbourne,melbourne,,,,,le-wagon,10868
1,15904,"Casablanca, Morocco",44.0,Morocco,MA,31119.0,Casablanca,casablanca,,,,,le-wagon,10868
2,15906,"Buenos Aires, Argentina",60.0,Argentina,AR,31171.0,Buenos Aires,buenos-aires,,,,,le-wagon,10868
3,15964,"Brussels, Belgium",46.0,Belgium,BE,31125.0,Brussels,brussels,,,,,le-wagon,10868
4,16039,"Mexico City, Mexico",29.0,Mexico,MX,31175.0,Mexico City,mexico-city,,,,,le-wagon,10868


In [8]:
courses = pd.concat(courses_list)
courses.head(5)

Unnamed: 0,courses,school,school_id
0,Data Analytics - Full-Time,le-wagon,10868
1,Data Analytics - Part-Time,le-wagon,10868
2,Data Engineering - Full-Time,le-wagon,10868
3,Data Engineering - Part-Time,le-wagon,10868
4,Data Science & AI - Full-Time,le-wagon,10868


In [9]:
badges = pd.concat(badges_list)
badges.head()

Unnamed: 0,name,keyword,description,school,school_id
0,Available Online,available_online,<p>School offers fully online courses</p>,le-wagon,10868
1,Flexible Classes,flexible_classes,<p>School offers part-time and evening classes...,le-wagon,10868
0,Available Online,available_online,<p>School offers fully online courses</p>,app-academy,10525
1,Flexible Classes,flexible_classes,<p>School offers part-time and evening classes...,app-academy,10525
2,Job Guarantee,job_guarantee,<p>School guarantees job placement</p>,app-academy,10525


In [10]:
import requests
from bs4 import BeautifulSoup

url = 'https://www.switchup.org/chimera/v1/bootcamp-list?mainTemplate=bootcamp-list%2Frankings&path=%2Frankings%2Fbest-coding-bootcamps&isDataTarget=false&featuredSchools=0&logoTag=logo&logoSize=original&numSchools=0&perPage=0&rankType=BootcampRankings&rankYear=2020&recentReview=true&reviewLength=50&numLocations=5&numSubjects=5&numCourses=5&sortOn=name&withReviews=false'

data = requests.get(url).json()

for i, bootcamp in enumerate(data['content']['bootcamps'], 1):
    soup = BeautifulSoup(bootcamp['description'], 'html.parser')
    print('{}. {}'.format(i, bootcamp['name']))
    print(soup.get_text(strip=True))
    print('-' * 80)
    
    

1. Le Wagon
Le Wagon is an intensive international coding bootcamp geared toward career changers and entrepreneurs who want to gain coding skills. Participants complete 450 hours of coding in 9 weeks full-time or 24 weeks part-time, which includes building their own web app. After completing the program, students join an international alumni network of 8,000+ for career support and community.
--------------------------------------------------------------------------------
2. App Academy
App Academy teaches participants everything they need to know about software engineering in just 12 weeks. Their full-time bootcamps have helped over 2,000 graduates find jobs at more than 850 companies. Their deferred tuition plan means participants pay for the program only after they’ve landed their first web development job.
--------------------------------------------------------------------------------
3. Ironhack
Ironhack offers full-time and part-time courses in 9- or 24-week formats, respectivel

In [11]:
#this creates a tuple, but we're going to try to convert to sql from df
'''# Create a list of tuples to gather the data
description_tuples = []

for bootcamp in data['content']['bootcamps']:
    name = bootcamp['name']
    description = BeautifulSoup(bootcamp['description'], 'html.parser').get_text(strip=True)

    # Agregate the tuple to the list
    description_tuples.append((name, description))'''

"# Create a list of tuples to gather the data\ndescription_tuples = []\n\nfor bootcamp in data['content']['bootcamps']:\n    name = bootcamp['name']\n    description = BeautifulSoup(bootcamp['description'], 'html.parser').get_text(strip=True)\n\n    # Agregate the tuple to the list\n    description_tuples.append((name, description))"

In [12]:
# CREATING A DF FOR THE DESCRIPTION TEXTS WE EXTRACTED
import pandas as pd
from bs4 import BeautifulSoup

#we create first an empty dataframe
description_df = pd.DataFrame(columns=['name', 'description'])

for bootcamp in data['content']['bootcamps']:
    name = bootcamp['name']
    description = BeautifulSoup(bootcamp['description'], 'html.parser').get_text(strip=True)

    # temporary df with a row
    temp_df = pd.DataFrame({'name': [name], 'description': [description]})

    # Concat the temporary df with the main df
    description_df = pd.concat([description_df, temp_df], ignore_index=True)


In [None]:
# Group cities/countries and their schools, and modalities (in badges)
# Within the 'comments' df, group by schools, then by programs, and check the avg of: 'overall', 'cv', and 'job support'
# Should we start doing this with SQL or continue with Python?

# Let's send it to SQL

In [18]:
import mysql.connector
from sqlalchemy import create_engine
import pandas as pd

# creating the conection
conn = mysql.connector.connect(
    host='localhost',
    user='root',
    password='Coperage_7',
    database='sqlproject'
)

# creating engine object
engine = create_engine('mysql+mysqlconnector://root:Coperage_7@localhost/sqlproject')
#engine = create_engine('mysql+mysqlconnector://root:Dorohedoro_0@localhost/sqlproject')
#Dorohedoro_0

In [19]:
# it gave an error, considering it as a dict, so I do the change now to be sure it works
comments_df = pd.DataFrame(comments_clean)

In [20]:
comments_df.to_sql(name='comments', con=engine, if_exists='replace', index=False)

In [23]:
locations.to_sql(name='locations', con=engine, if_exists='replace', index=False)

In [24]:
courses.to_sql(name='courses', con=engine, if_exists='replace', index=False)

In [26]:
badges.to_sql(name='badges', con=engine, if_exists='replace', index=False)

In [25]:
description_df.to_sql(name='description', con=engine, if_exists='replace', index=False)

In [22]:
# Closing conection
conn.close()
