## Webscraping Python Versions 



In [1]:
# Import the required libraries

import requests, bs4, pandas as pd

In [2]:
# The URL to be scraped is parsed into a string object. The request module queries the website via a get method which
# then returns a response object.

url = "https://www.python.org/downloads/" 
response = requests.get(url)


In [3]:
# Verify the status of the response by calling the raise_for_status method on the response object

response.raise_for_status

<bound method Response.raise_for_status of <Response [200]>>

In [4]:
# The response is parsed to string with the text method

data = response.text
# data

In [5]:
# Parsing the text data into a BeautifulSoup object with "html.parser". This will help BeautifulSoup to recognise
# all the tags and attributes of the html document and treat them like so, accordingly

soup = bs4.BeautifulSoup(data, "html.parser")
# soup

In [6]:
# Testing the parsed tree of the soup object by calling the titie tags (This can be omitted)

soup.find_all('title')

[<title>Download Python | Python.org</title>]

In [7]:
# The released dates are texts inside the span tags which has release-date as one of the attributes of the class. 
# This property is used to collect all the strings in the span tags.
# The count object is only used to count the number of loops in the process. It can be omitted

count = -1
release_dates_list = []
tags = soup.find_all('span', {"class": "release-date"})
for tag in tags:
    release_dates_list.append(tag.text)
    
    count += 1
release_dates = release_dates_list[1:]   
# print(count)
# print(release_dates)    

In [8]:
count = 0
head_link = "https://www.python.org"
versions_list = []
release_downloads_list = []

spans = soup.find_all('span', {"class": "release-number"})
for tag in spans:
    x= tag.find_all('a')
    for release_link in x:
        y = head_link  + release_link.get('href')
        versions_list.append(release_link.text)
        release_downloads_list.append(y)

        count += 1
# print(count)
# print( versions_list, release_downloads_list)


In [9]:
count = 0
release_notes_list = []

spans = soup.find_all('span', {"class": "release-enhancements"})
for tag in spans:
    x = tag.find_all('a')
    for release_link in x:
        release_notes_list.append(release_link.get('href'))

        count += 1
# print(count)
# print(release_notes_list)

In [10]:
python_dict = {"Python Version": versions_list, 
               "Release Date": release_dates,
               "Release Download Link": release_downloads_list,
               "Release Notes Link": release_notes_list,
              }

In [11]:
df = pd.DataFrame(python_dict)

# print(df)

    Python Version    Release Date  \
0    Python 3.10.5    June 6, 2022   
1    Python 3.9.13    May 17, 2022   
2    Python 3.10.4  March 24, 2022   
3    Python 3.9.12  March 23, 2022   
4    Python 3.10.3  March 16, 2022   
..             ...             ...   
160   Python 2.2.2   Oct. 14, 2002   
161   Python 2.2.1  April 10, 2002   
162   Python 2.1.3   April 9, 2002   
163   Python 2.2.0   Dec. 21, 2001   
164   Python 2.0.1   June 22, 2001   

                                 Release Download Link  \
0    https://www.python.org/downloads/release/pytho...   
1    https://www.python.org/downloads/release/pytho...   
2    https://www.python.org/downloads/release/pytho...   
3    https://www.python.org/downloads/release/pytho...   
4    https://www.python.org/downloads/release/pytho...   
..                                                 ...   
160  https://www.python.org/downloads/release/pytho...   
161  https://www.python.org/downloads/release/pytho...   
162  https://www.pyth

In [13]:
df.to_csv("Python_Versions1.csv")