# import the required libraries

In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

# HTTP request

### Store website url in a variable

In [3]:
url = "https://www.opencodez.com/"

### Get request

In [5]:
response = requests.get(url)

### Check status code

In [6]:
response.status_code

200

# BeautifulSoup Object

In [7]:
soup = BeautifulSoup(response.content , "html.parser")

# Results

In [8]:
latest_post = soup.find("div" , attrs = {"class":"latest-section"})
# latest_post

### Extract data from one post

In [7]:
title = latest_post.find("h2" , attrs = {"class":"title"}).get_text().strip()
title

'Python Tutorial :#2 Python Vs Java'

In [8]:
readMore_url = latest_post.find("div" , attrs = {"class":"readMore"}).find("a")["href"]
response = requests.get(readMore_url)
soup_readMore = BeautifulSoup(response.content , "html.parser")

In [9]:
description = soup_readMore.find("div" , attrs = {"class":"post-single-content box mark-links"}).find("p").get_text().strip()
description

'As we know Python and Java, Both the languages are powerful and both the languages provide many great frameworks. But the question is when to use which language? So, in this article we are going to some see main differences between Python and Java.'

In [10]:
author = latest_post.find("span" , attrs = {"class":"theauthor"}).find("a").get_text().strip()
author

'Supriya'

In [11]:
time = latest_post.find("span" , attrs = {"class":"thetime"}).get_text().strip().split(",")
time

['Mar 10', ' 2022']

In [12]:
date = time[0]
date

'Mar 10'

In [13]:
year = time[1].strip()
year

'2022'

In [14]:
post_link = latest_post.find("h2" , attrs = {"class":"title"}).find("a")["href"]
post_link

'https://www.opencodez.com/python/python-tutorial-2-python-vs-java.htm'

In [15]:
image_link = latest_post.find("div" , attrs = {"class":"featured-thumbnail"}).find("img")["src"]
image_link

'https://www.opencodez.com/wp-content/uploads/2022/03/python-vs-java-150x100.png'

## Extract data from all posts

In [41]:
title_all = latest_post.findAll("h2" , attrs = {"class":"title"})
descriptions = latest_post.findAll("div" , attrs = {"class":"post-content image-caption-format-1"})
authors = latest_post.findAll("span" , attrs = {"class":"theauthor"})
years_all = latest_post.findAll("span" , attrs = {"class":"thetime"})
months_all = latest_post.findAll("span" , attrs = {"class":"thetime"})
all_posts_link = latest_post.findAll("h2" , attrs = {"class":"title"})
all_images_link = latest_post.findAll("div" , attrs = {"class":"featured-thumbnail"})
readMore_url = latest_post.findAll("div" , attrs = {"class":"readMore"})

In [42]:
# to know all of the post have values

each_list_lenght = [
    len(title_all),
    len(descriptions),
    len(authors),
    len(years_all),
    len(months_all),
    len(all_posts_link),
    len(all_images_link),
    len(readMore_url)
]

each_list_lenght

[12, 12, 12, 12, 12, 12, 12, 12]

In [43]:
title_list = []
descriptions_list = []
authors_list = []
years_list = []
months_list = []
posts_link_list = []
images_link_list = []
readMore_list_url = []

In [45]:
# check for missing value

if len(set(each_list_lenght)) == 1:
    
    for index in range(0, each_list_lenght[0]):
        
        title_list.append(title_all[index].find("a").get_text())
        descriptions_list.append(descriptions[index].find('p').get_text().strip())
        authors_list.append(authors[index].find("a").get_text())
        years_list.append(years_all[index].get_text().split(",")[1].strip())
        months_list.append(months_all[index].get_text().split(",")[0].strip())
        posts_link_list.append(all_posts_link[index].find("a")["href"])
        images_link_list.append(all_images_link[index].find('img')['src'])
        readMore_list_url.append(readMore_url[index].find("a")['href'])
             
else:
    print("sorry we have missing values")

# Create Pandas Dataframe

In [46]:
df = pd.DataFrame(list(zip(title_list, descriptions_list, authors_list, years_list, months_list,posts_link_list,
                           images_link_list)), columns=["titles","descriptions","authors","years","months",
                                                        "posts_link","images_link"])
df

Unnamed: 0,titles,descriptions,authors,years,months,posts_link,images_link
0,Python Tutorial :#2 Python Vs Java,"As we know Python and Java, Both the languages...",Supriya,2022,Mar 10,https://www.opencodez.com/python/python-tutori...,https://www.opencodez.com/wp-content/uploads/2...
1,Python Tutorial : #1 Introduction to Python,Python is a most popular programming language ...,Supriya,2022,Feb 20,https://www.opencodez.com/python/python-tutori...,https://www.opencodez.com/wp-content/uploads/2...
2,Chain of Responsibility – Behavioral Design Pa...,The Chain of Responsibility Pattern comes unde...,Supriya,2020,Aug 1,https://www.opencodez.com/java/chain-of-respon...,https://www.opencodez.com/wp-content/uploads/2...
3,Flyweight Pattern – Structural Design Pattern,Flyweight pattern comes under Structural Desig...,Supriya,2020,May 20,https://www.opencodez.com/java/flyweight-patte...,https://www.opencodez.com/wp-content/uploads/2...
4,What Is Full Stack QA or Tester? 4 Steps Guide...,We have always heard the term full-stack devel...,Shilpa,2020,May 18,https://www.opencodez.com/software-testing/bec...,https://www.opencodez.com/wp-content/uploads/2...
5,Recession 2020 -7 Highly Effective Ways IT Pro...,Many say that another wave of Recession is app...,Shilpa,2020,Apr 27,https://www.opencodez.com/it-lifestyle/7-highl...,https://www.opencodez.com/wp-content/uploads/2...
6,10 Productivity Tips For Working From Home (WF...,"It’s been more than 5 years, I am working from...",Shilpa,2020,Mar 30,https://www.opencodez.com/it-lifestyle/10-prod...,https://www.opencodez.com/wp-content/uploads/2...
7,Complete Step by Step Guide of Gherkin for Beg...,"Gherkin is a plain English text language, used...",Pritam,2020,Mar 22,https://www.opencodez.com/software-testing/com...,https://www.opencodez.com/wp-content/uploads/2...
8,Top 8 Most Commonly asked HR Interview Questio...,There are some questions that are frequently a...,Shilpa,2020,Mar 21,https://www.opencodez.com/how-to-guide/the-8-m...,https://www.opencodez.com/wp-content/uploads/2...
9,7 Free Testing Artifacts / Deliverables Templa...,Testing Artifacts are deliverables or document...,Shilpa,2020,Mar 19,https://www.opencodez.com/software-testing/fre...,https://www.opencodez.com/wp-content/uploads/2...
