<a href="https://colab.research.google.com/github/JanithRankelum/streamlit_app/blob/master/app_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import streamlit as st
import boto3
import pandas as pd
import numpy as np
from io import StringIO
from datetime import datetime
import time

# AWS S3 Configuration
BUCKET_NAME = 'riceprice-s3-bucket'
FILE_PATH = '/content/drive/MyDrive/cleaned_data.csv'

# Initialize S3 client
s3 = boto3.client('s3')

# Function to upload a file to S3
def upload_to_s3(file_path, bucket_name):
    try:
        timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
        s3_file_name = f'rice_prices_{timestamp}.csv'
        s3.upload_file(file_path, bucket_name, s3_file_name)
        st.success(f"Uploaded {file_path} to S3 as {s3_file_name}")
    except Exception as e:
        st.error(f"Error uploading file: {e}")

# Function to read the latest CSV file from S3
def read_latest_csv_from_s3(bucket_name):
    try:
        response = s3.list_objects_v2(Bucket=bucket_name)
        if 'Contents' not in response:
            return pd.DataFrame()

        latest_file = max(response['Contents'], key=lambda x: x['LastModified'])
        file_key = latest_file['Key']
        response = s3.get_object(Bucket=bucket_name, Key=file_key)
        csv_data = response['Body'].read().decode('utf-8')
        df = pd.read_csv(StringIO(csv_data))

        # Ensure date is in datetime format
        if 'date' in df.columns:
            df['date'] = pd.to_datetime(df['date'])
        return df
    except Exception as e:
        st.error(f"Error reading file from S3: {e}")
        return pd.DataFrame()

# Function to detect anomalies using Z-score
def detect_anomalies(df, threshold=2.5):
    if len(df) == 0:
        return pd.DataFrame()

    df = df.copy()
    df['z_score'] = (df['price'] - df['price'].mean()) / df['price'].std()
    df['is_anomaly'] = np.abs(df['z_score']) > threshold
    return df

# Function to calculate moving average
def calculate_trends(df, window_size=3):
    if len(df) == 0:
        return pd.DataFrame()

    df = df.sort_values('date')
    df['moving_avg'] = df['price'].rolling(window=window_size).mean()
    return df

# Streamlit App
st.title("Real-Time Rice Prices Dashboard")
st.write("Visualizing rice prices from S3 in real-time")

# Upload section
if st.button("Upload Data to S3"):
    upload_to_s3(FILE_PATH, BUCKET_NAME)

# Initialize session state
if 'row_index' not in st.session_state:
    st.session_state.row_index = 0
if 'full_data' not in st.session_state:
    st.session_state.full_data = pd.DataFrame()

# Main display
last_refresh = st.empty()

while True:
    # Load data
    new_data = read_latest_csv_from_s3(BUCKET_NAME)
    if not new_data.empty:
        st.session_state.full_data = pd.concat([st.session_state.full_data, new_data]).drop_duplicates()

    if not st.session_state.full_data.empty:
        # Process data
        processed_data = st.session_state.full_data.copy()
        processed_data = calculate_trends(processed_data)
        processed_data = detect_anomalies(processed_data)

        # Get current window of data
        start_idx = st.session_state.row_index
        end_idx = min(start_idx + 10, len(processed_data))
        current_data = processed_data.iloc[start_idx:end_idx]

        # Display raw data
        st.write("### Current Data Window")
        st.dataframe(current_data[['date', 'province', 'price']])

        # Price Trends Visualization (Original Style)
        st.write("### Price Trends")
        st.line_chart(
            processed_data.set_index('date')[['price', 'moving_avg']],
            use_container_width=True
        )

        # Anomaly Visualization (Original Style)
        anomalies = processed_data[processed_data['is_anomaly']]
        if not anomalies.empty:
            st.write("### Detected Anomalies")
            st.dataframe(anomalies[['date', 'province', 'price', 'z_score']])

            # Highlight anomalies on chart
            st.write("### Anomalies on Price Chart")
            chart_data = processed_data.set_index('date')[['price']]
            st.line_chart(chart_data)
            for idx, row in anomalies.iterrows():
                st.write(f"🚨 Anomaly on {row['date'].date()}: {row['price']} (Z-score: {row['z_score']:.2f})")

        # Provincial Comparison (Original Style)
        st.write("### Provincial Price Comparison")
        st.bar_chart(
            current_data.groupby('province')['price'].mean()
        )

        # Summary Statistics
        st.write("### Summary Statistics")
        st.write(current_data['price'].describe())

        # Update index
        st.session_state.row_index = end_idx
        if st.session_state.row_index >= len(processed_data):
            st.session_state.row_index = 0
    else:
        st.warning("No data available in S3 bucket")

    # Update refresh time
    last_refresh.write(f"Last refreshed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

    # Wait for next refresh
    time.sleep(60)  # Refresh every 60 seconds