<a href="https://colab.research.google.com/github/Apekshaa2908/Enhancing-E-Commerce-Agility-With-Advanced-ETL-Pipeline/blob/main/Streamlit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install streamlit
!pip install boto3

In [None]:
%%writefile app.py
import streamlit as st
import boto3
import pandas as pd
from io import BytesIO
import time

# Function to read AWS credentials from a text file
def read_credentials(file_path):
    with open(file_path, 'r') as f:
        lines = f.readlines()
        credentials = {}
        for line in lines:
            key, value = line.split(':', 1)
            credentials[key.strip()] = value.strip()
    return credentials

# Load AWS credentials from a file
credentials = read_credentials('path/to/your/credentials.txt')

# Assigning the credentials
aws_access_key_id = credentials['Access Key']
aws_secret_access_key = credentials['Secret Access Key']
region_name = credentials['Region']

# Define bucket names and Glue job name
ORDER_BUCKET = 'order-apekshaa'
RETURN_BUCKET = 'return-apekshaa'
GLUE_JOB_NAME = 'order_return_glue_etl'  # The name of the Glue job to monitor

# Initialize the S3 and Glue clients
s3 = boto3.client(
    's3',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    region_name=region_name
)

glue = boto3.client(
    'glue',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    region_name=region_name
)

# Function to upload files to S3
def upload_to_s3(file, bucket_name, file_name):
    try:
        file_buffer = BytesIO()
        file.to_csv(file_buffer, index=False)
        file_buffer.seek(0)
        s3.put_object(Bucket=bucket_name, Key=file_name, Body=file_buffer.getvalue())
        st.success(f'File {file_name} uploaded to {bucket_name}')
    except Exception as e:
        st.error(f"Error uploading {file_name}: {str(e)}")

# Function to check Glue job status
def check_glue_job_status(job_run_id):
    response = glue.get_job_run(JobName=GLUE_JOB_NAME, RunId=job_run_id)
    return response['JobRun']['JobRunState']

# File uploaders
order_file = st.file_uploader("Upload order_table CSV", type=["csv"], key="order_file")
return_file = st.file_uploader("Upload return_table CSV", type=["csv"], key="return_file")

# Upload button for both files
if order_file and return_file:
    if st.button("Upload Both Files"):
        # Upload both files to S3
        order_df = pd.read_csv(order_file)
        return_df = pd.read_csv(return_file)

        upload_to_s3(order_df, ORDER_BUCKET, 'order_table.csv')
        upload_to_s3(return_df, RETURN_BUCKET, 'return_table.csv')

        st.success("Both files uploaded. Monitoring Glue job...")

        # Assume the Glue job runs automatically; we need to fetch the most recent job run ID
        with st.spinner("Fetching the latest Glue job run ID..."):
            response = glue.get_job_runs(JobName=GLUE_JOB_NAME)
            latest_run = response['JobRuns'][0]  # Get the most recent job run
            job_run_id = latest_run['Id']
            st.write(f'Monitoring Glue job with Run ID: {job_run_id}')

            # Monitor the Glue job status
            while True:
                time.sleep(60)  # Check every minute
                status = check_glue_job_status(job_run_id)
                st.write(f'Current Glue job status: {status}')

                if status in ['SUCCEEDED', 'FAILED', 'STOPPED']:
                    st.success(f'Glue job has {"succeeded" if status == "SUCCEEDED" else "failed" if status == "FAILED" else "stopped"}!')
                    break
else:
    st.warning("Please upload both order_table and return_table CSV files to proceed.")


In [None]:
! wget -q -O - ipv4.icanhazip.com
! streamlit run app.py & npx localtunnel --port 8501