## ADVANCED PANDAS: DATA IMPORTING & WEB SCRAPING

### Import Libraries & Methods

In [5]:
from bs4 import BeautifulSoup as bs

In [6]:
from urllib.request import urlopen

### Inputting The URL

In [9]:
url='https://wuzzuf.net/search/jobs/?a=navbg%7Cspbg&q=illustrator'

### Creating a Client-based Request to Get the url

In [10]:
client=urlopen(url)

### Getting The HTML Code Of The Full Page

In [11]:
html=client.read()

In [13]:
html

b'<!DOCTYPE html>\n<html lang="en">\n<head>\n    <meta charset="utf-8">\n    <meta http-equiv="X-UA-Compatible" content="IE=edge">\n    <meta name="viewport" content="width=device-width, initial-scale=1.0, shrink-to-fit=no">\n    <meta http-equiv="expires" content="Thu Dec 08 2022 18:30:44 GMT+0200" />\n\n    <meta http-equiv="Pragma" content="no-cache">\n    <meta http-equiv="cache-control" content="no-cache, no-store, must-revalidate">\n\n    <title data-react-helmet="true">Job Search | WUZZUF</title>\n\n<meta data-react-helmet="true" charset="utf-8"/><meta data-react-helmet="true" name="description" content="Searching for jobs in Egypt? Wuzzuf helps you in your online job search to find Jobs in Egypt and Middle East. Choose the right job using our online recruitment services."/><meta data-react-helmet="true" name="keywords" content="jobs in Egypt, job in Egypt, careers egypt, jobs in Cairo, jobs in alexandria, employment in egypt, Egypt jobs, jobs vacancies, job vacancies in egypt, 

In [14]:
## close the connection 
client.close()

### Creating an HTML parser using BeautifulSoup 

In [17]:
soup= bs(html,"html.parser")
soup

<!DOCTYPE html>

<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1.0, shrink-to-fit=no" name="viewport"/>
<meta content="Thu Dec 08 2022 18:30:44 GMT+0200" http-equiv="expires">
<meta content="no-cache" http-equiv="Pragma"/>
<meta content="no-cache, no-store, must-revalidate" http-equiv="cache-control"/>
<title data-react-helmet="true">Job Search | WUZZUF</title>
<meta charset="utf-8" data-react-helmet="true"><meta content="Searching for jobs in Egypt? Wuzzuf helps you in your online job search to find Jobs in Egypt and Middle East. Choose the right job using our online recruitment services." data-react-helmet="true" name="description"><meta content="jobs in Egypt, job in Egypt, careers egypt, jobs in Cairo, jobs in alexandria, employment in egypt, Egypt jobs, jobs vacancies, job vacancies in egypt, job search egypt, job vacancies egypt, job recruitment in egypt, job opportunities in

### Creating a Container for the Needed 


In [20]:
#to find all content that are similar to these content
containers=soup.find_all("div",{"class":"css-1gatmva e1v1l3u10"})

In [22]:
len(containers)

15

In [23]:
bs.prettify(containers[0])

'<div class="css-1gatmva e1v1l3u10">\n <style data-emotion="css pkv5jc">\n  .css-pkv5jc{position:relative;min-height:60px;}\n </style>\n <div class="css-pkv5jc">\n  <a href="https://wuzzuf.net/jobs/careers/Smart-EGAT-Egypt-51158" rel="noreferrer" target="_blank">\n   <style data-emotion="css 17095x3">\n    .css-17095x3{position:absolute;right:0;top:0;width:60px;height:60px;object-fit:contain;object-position:center center;}\n   </style>\n  </a>\n  <style data-emotion="css laomuu">\n   .css-laomuu{padding-right:60px;}\n  </style>\n  <div class="css-laomuu">\n   <style data-emotion="css m604qf">\n    .css-m604qf{font-size:16px;font-weight:600;font-style:normal;letter-spacing:-0.4px;line-height:24px;color:#0055D9;margin:0;}\n   </style>\n   <h2 class="css-m604qf">\n    <style data-emotion="css o171kl">\n     .css-o171kl{-webkit-text-decoration:none;text-decoration:none;color:inherit;}\n    </style>\n    <a class="css-o171kl" href="/jobs/p/UCnVd6j8CIFl-Illustrator-Designer---3-days-a-week-S

### Accessing Page Element

In [25]:
#get title of the job for first box in the page 
containers[0].div.h2.text

'Illustrator Designer - 3 days a week'

In [26]:
jtitle=containers[0].div.h2.text
jtitle

'Illustrator Designer - 3 days a week'

In [31]:
#this way more easy and readable than the previous 
jtitle=containers[0].findAll("h2",{"class":"css-m604qf"})
jtitle[0].text

'Illustrator Designer - 3 days a week'

In [34]:
companyname=containers[0].findAll("a",{"class":"css-17s97q8"})
companyname[0].text

'Smart EGAT -'

In [35]:
jobtype=containers[0].findAll("span",{"class":"css-1ve4b75 eoyjyou0"})
jobtype[0].text

'Part Time'

### Bringing it All together 

In [62]:
with open("wuzzf-illustrator.csv", "w") as file:
    header = "Job_Title, Company_Name, Job_Type\n"
    file.write(header)

In [64]:
for container in containers:
    jtitle = container.findAll("h2", {"class": "css-m604qf"})
    Job_Title = jtitle[0].text.strip()

    companyname = container.findAll("a", {"class": "css-17s97q8"})
    Company_Name = companyname[0].text.strip()

    jobtype = container.findAll("span", {"class": "css-1ve4b75 eoyjyou0"})
    Job_Type = jobtype[0].text.strip()

    with open("wuzzf-illustrator.csv", "a") as file:  # Use "a" mode for append
        file.write(Job_Title + "," + Company_Name + "," + Job_Type + "\n")

file.close()

### Inputting the file into pandas 


In [70]:
import pandas as pd
wuzzf=pd.read_csv("wuzzf-illustrator.csv")


In [71]:
wuzzf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Job_Title      30 non-null     object
 1    Company_Name  30 non-null     object
 2    Job_Type      30 non-null     object
dtypes: object(3)
memory usage: 848.0+ bytes
