In [48]:
"""
The code written extracts a  XML file from a downloaded provided link, based on a specified file_type attribute in an XML input file.

The script performs the following steps:
1. Loads the XML file using ElementTree.
2. Searches for a doc element with a download_link element containing file_type attribute as DLTINS.
3. If found, the download_link is extracted.
4. The zip file associated with the download_link is downloaded using urllib.request.
5. The XML file with the specified file name is extracted from the downloaded zip file using zipfile.
6. Logging information is recorded to a log file named app.log.

Modules used:
pandas as pd
csv
zip file
xml.etree.ElementTree as ET
urllib.request
logging
"""
import logging
import xml.etree.ElementTree as ET
import urllib.request
import zipfile
import csv
import pandas as pd

logging.basicConfig(filename='app.log', filemode='w', format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO)
#Load the XML file
tree = ET.parse('Assingment.xml')
root = tree.getroot()

download_link = None
for doc in root.findall('.//doc'):
    download_link = doc.find('str[@name="download_link"]').text
#download link with file_type as DLTINS
for link in root.findall(".//download_link"):
    file_type = link.get("file_type")
    logging.info(f'File type found: {file_type}')
    if file_type == "DLTINS":
        download_link = link.text
        break
# Download the zip file associated with the download link
if download_link:
    logging.info(f'Downloading file from: {download_link}')
    urllib.request.urlretrieve(download_link, "file.zip")
    logging.info(f'File downloaded successfully.')
    # Extract the XML file from the downloaded zip file
    with zipfile.ZipFile("file.zip", 'r') as zip_ref:

        xml_file = zip_ref.extract("DLTINS_20210118_01of01.xml")
        logging.info(f'XML file extracted from zip: {xml_file}')
else:
    logging.error('No download link with file_type as DLTINS found in the XML.')


2023-04-23 13:59:38 - INFO - Downloading file from: http://firds.esma.europa.eu/firds/DLTINS_20210118_01of01.zip
2023-04-23 13:59:38 - INFO - File downloaded successfully.
2023-04-23 13:59:38 - INFO - XML file extracted from zip: C:\Users\Sakshi Priya\SteeleyeAssingment\DLTINS_20210118_01of01.xml


In [47]:
"""
This module uses ElementTree to parse an XML file, remove namespace prefixes from all elements, and write the modified XML to a new file.

Functions:

remove_namespace_prefix(elem): accepts an Element object as argument and removes the namespace prefix from the tag of the element, as well as from the tag of all its children recursively.
Removes the namespace prefix from the tag of an Element object and its children recursively
"""
import xml.etree.ElementTree as ET

# Parse the XML file
try:
    tree = ET.parse('DLTINS_20210118_01of01.xml')
    root = tree.getroot()
    logging.debug('XML file parsed successfully')
except Exception as e:
    logging.error(f'Error parsing XML file: {e}')

def remove_namespace_prefix(elem):
# Check if the element's tag contains a namespace prefix
    if '}' in elem.tag:
# Remove the namespace prefix from the tag
        elem.tag = elem.tag.split('}', 1)[1]
# Recursively remove the namespace prefix from the element's children
    for child in elem:
        remove_namespace_prefix(child)
#Remove the namespace prefix from all elements in the document
try:
    remove_namespace_prefix(root)
    logging.debug('Namespace prefix removed from all elements')
except Exception as e:
    logging.error(f'Error removing namespace prefix: {e}')

#Write the modified XML to a new file
try:
    tree.write('modified.xml')
    logging.debug('Modified XML file written successfully')
except Exception as e:
    logging.error(f'Error writing modified XML file: {e}')


In [46]:
"""In this block we have parsed through modified xml file and created csv file and wrote the 
header along with using the try and exception method , we have also defined the attributes and 
loaded it with the data which were asked during the assingnment like
id,full_nm, clssfctn_tp,cmmdty_tp,ntnl_ccy,issr 
and then printed the first five rows using the df.head() function """
try:
    tree = ET.parse('modified.xml')
    root = tree.getroot()
except IOError:
    logging.error("Unable to open file: modified.xml")

# Creating  CSV file and writing the header
try:
    with open('output.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        # Writing the header
        writer.writerow(['FinInstrmGnlAttrbts.Id', 'FinInstrmGnlAttrbts.FullNm', 'FinInstrmGnlAttrbts.ClssfctnTp',
                     'FinInstrmGnlAttrbts.CmmdtyDerivInd', 'FinInstrmGnlAttrbts.NtnlCcy', 'Issr'])
    
    # Looping through each element in the XML tree
        for elem in root.findall('.//FinInstrm'):
            id= elem.find('TermntdRcrd/FinInstrmGnlAttrbts/Id').text if elem.find('TermntdRcrd/FinInstrmGnlAttrbts/Id') is not None else ''
            full_nm = elem.find('TermntdRcrd/FinInstrmGnlAttrbts/FullNm').text if elem.find('TermntdRcrd/FinInstrmGnlAttrbts/FullNm') is not None else ''
            clssfctn_tp=elem.find('TermntdRcrd/FinInstrmGnlAttrbts/ClssfctnTp').text if elem.find('TermntdRcrd/FinInstrmGnlAttrbts/ClssfctnTp') is not None else ''
            cmmdty_derive_ind=elem.find('TermntdRcrd/FinInstrmGnlAttrbts/CmmdtyDerivInd').text if elem.find('TermntdRcrd/FinInstrmGnlAttrbts/CmmdtyDerivInd') is not None else ''
            ntnl_ccy=elem.find('TermntdRcrd/FinInstrmGnlAttrbts/NtnlCcy').text if elem.find('TermntdRcrd/FinInstrmGnlAttrbts/NtnlCcy') is not None else ''
            issr=elem.find('TermntdRcrd/Issr').text if elem.find('TermntdRcrd/Issr') is not None else ''
        
        # Writing the values to the CSV file
            writer.writerow([id, full_nm, clssfctn_tp, cmmdty_derive_ind, ntnl_ccy, issr])
except IOError:
    logging.error("Unable to create file: output.csv")
#Read CSV file and display the first five rows
df= pd.read_csv('output.csv')
df.head()


Unnamed: 0,FinInstrmGnlAttrbts.Id,FinInstrmGnlAttrbts.FullNm,FinInstrmGnlAttrbts.ClssfctnTp,FinInstrmGnlAttrbts.CmmdtyDerivInd,FinInstrmGnlAttrbts.NtnlCcy,Issr
0,EZV1JDJ1R5Q9,Foreign_Exchange Forward JPY SEK 20210116,JFTXFP,False,SEK,2138004TYNQCB7MLTG76
1,EZV4N4MHR6R0,Foreign_Exchange Forward CAD GBP 20210116,JFTXFP,False,GBP,2138004TYNQCB7MLTG76
2,EZV51K6FBPB6,Rates Swap Fixed_Float EUR-EURIBOR-Reuters 3 M...,SRCCSP,False,EUR,3IOL70HIEQ2FWND3JI79
3,EZV8QY62F100,Foreign_Exchange Forward AUD SEK 20210116,JFTXFP,False,SEK,2138004TYNQCB7MLTG76
4,EZV9QCSPFHR0,Foreign_Exchange Forward GBP USD 20210116,JFTXFP,False,GBP,2138004TYNQCB7MLTG76


In [49]:
"""In this block we have used and imported boto3 library in order to set up the s3 bucket and then set the bucket name and file path 
in aws by using the access key and secret key by finding it out while making iam account in the aws manangement
console and then creating user there and then retrieving access and secret key """
import boto3

# S3 client setup
s3 = boto3.client('s3',
                  aws_access_key_id='AKIA6EVJJD7NALW3T5NA',
                  aws_secret_access_key='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')

# Set the S3 bucket and file path
bucket_name = 'sakshipriyas3'
s3.create_bucket(Bucket= bucket_name)
file_name = 'output.csv'

# Uploading the file to S3
s3.upload_file(file_name,bucket_name, file_name)

print(f"{file_name} uploaded to {bucket_name}!")

output.csv uploaded to sakshipriyas3!
