<a href="https://colab.research.google.com/github/JanithRankelum/streamlit_app/blob/master/app_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import streamlit as st
import boto3
import pandas as pd
from io import StringIO
from datetime import datetime
import time

# AWS S3 Configuration
BUCKET_NAME = 'riceprice-s3-bucket'  # Replace with your bucket name
FILE_PATH = '/content/drive/MyDrive/cleaned_data.csv'  # Replace with the path to your local CSV file

# Initialize S3 client
s3 = boto3.client('s3')

# Function to upload a file to S3
def upload_to_s3(file_path, bucket_name):
    try:
        # Generate a unique file name using the current timestamp
        timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
        s3_file_name = f'rice_prices_{timestamp}.csv'

        # Upload the file to S3
        s3.upload_file(file_path, bucket_name, s3_file_name)
        st.success(f"Uploaded {file_path} to S3 as {s3_file_name}")
    except Exception as e:
        st.error(f"Error uploading file: {e}")

# Function to read the latest CSV file from S3
def read_latest_csv_from_s3(bucket_name):
    try:
        # List objects in the S3 bucket
        response = s3.list_objects_v2(Bucket=bucket_name)
        if 'Contents' not in response:
            return pd.DataFrame()

        # Find the latest file based on the last modified timestamp
        latest_file = max(response['Contents'], key=lambda x: x['LastModified'])
        file_key = latest_file['Key']

        # Read the latest CSV file
        response = s3.get_object(Bucket=bucket_name, Key=file_key)
        csv_data = response['Body'].read().decode('utf-8')
        return pd.read_csv(StringIO(csv_data))
    except Exception as e:
        st.error(f"Error reading file from S3: {e}")
        return pd.DataFrame()

# Streamlit App
st.title("Real-Time Rice Prices Dashboard")
st.write("Visualizing rice prices from S3 in real-time.")

# Upload the file to S3 periodically
if st.button("Upload Data to S3"):
    upload_to_s3(FILE_PATH, BUCKET_NAME)

# Load the latest data from S3
data = read_latest_csv_from_s3(BUCKET_NAME)

# Initialize session state to track the number of rows displayed
if 'row_index' not in st.session_state:
    st.session_state.row_index = 0

# Display data
if not data.empty:
    st.write("### Raw Data")

    # Display the next 10 rows
    start_index = st.session_state.row_index
    end_index = start_index + 10
    current_data = data.iloc[start_index:end_index]  # Get the current 10 rows
    st.dataframe(current_data)

    # Update the row index for the next refresh
    st.session_state.row_index = end_index

    # Reset the index if all rows have been displayed
    if st.session_state.row_index >= len(data):
        st.session_state.row_index = 0

    # Visualizations based on the current 10 rows
    st.write("### Price Trends Over Time (Current 10 Rows)")
    st.line_chart(current_data.set_index('date')['price'])

    st.write("### Provincial Price Comparison (Current 10 Rows)")
    st.bar_chart(current_data.groupby('province')['price'].mean())

    st.write("### Summary Statistics (Current 10 Rows)")
    st.write(current_data['price'].describe())
else:
    st.warning("No data found in S3.")

# Refresh data periodically
refresh_interval = 60  # Refresh every 60 seconds
last_refresh = st.empty()

while True:
    # Reload the latest data from S3
    data = read_latest_csv_from_s3(BUCKET_NAME)

    # Update the visualizations
    if not data.empty:
        st.write("### Updated Raw Data")

        # Display the next 10 rows
        start_index = st.session_state.row_index
        end_index = start_index + 10
        current_data = data.iloc[start_index:end_index]  # Get the current 10 rows
        st.dataframe(current_data)

        # Update the row index for the next refresh
        st.session_state.row_index = end_index

        # Reset the index if all rows have been displayed
        if st.session_state.row_index >= len(data):
            st.session_state.row_index = 0

        st.write("### Updated Price Trends Over Time (Current 10 Rows)")
        st.line_chart(current_data.set_index('date')['price'])

        st.write("### Updated Provincial Price Comparison (Current 10 Rows)")
        st.bar_chart(current_data.groupby('province')['price'].mean())

        st.write("### Updated Summary Statistics (Current 10 Rows)")
        st.write(current_data['price'].describe())
    else:
        st.warning("No data found in S3.")

    # Display the last refresh time
    last_refresh.write(f"Last refreshed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

    # Wait for the refresh interval
    time.sleep(refresh_interval)